Setup

library(conos)
library(magrittr)
library(dplyr)
library(cacoa) # github.com/kharchenkolab/cacoa
## Warning: replacing previous import 'ape::where' by 'dplyr::where' when loading
## 'cacoa'
library(sccore)
library(scHelper) # github.com/rrydbirk/scHelper
library(qs)
library(ggplot2)
library(ggsci)
library(cowplot)
library(ggpubr)
library(STRINGdb)
library(reshape2)
library(ggforce)
library(ggpmisc)
library(RColorBrewer)
library(CRMetrics)
library(ggrepel)
library(circlize)
library(ComplexHeatmap)
library(stringr)
library(rstatix)
library(slingshot)
library(gamm4)
library(scITD)
library(ggraph)

## Comparisons
comp <- list(c("CTRL","MSA"),
             c("CTRL","PD"),
             c("MSA","PD"))

comp.long <- list(c("CTRL vs. MSA","CTRL vs. PD"),
                  c("CTRL vs. MSA","PD vs. MSA"),
                  c("CTRL vs. PD","PD vs. MSA"))

# Palettes

## Major celltypes
anno.neurons <- qread("anno_neurons.qs")
anno.neurons.major <- anno.neurons %>% 
  collapseAnnotation("GABA") %>% 
  collapseAnnotation("GLU") %>% 
  renameAnnotation("GABA", "Inh. neurons") %>% 
  renameAnnotation("GLU", "Exc. neurons") %>% 
  collapseAnnotation("MSN")
## Collapsing 11 labels containing 'GABA' in their name into one label.
## Collapsing 2 labels containing 'GLU' in their name into one label.
## Collapsing 3 labels containing 'MSN' in their name into one label.
anno <- qread("anno_major.qs")

anno.major <- anno[!names(anno) %in% names(anno.neurons.major) & !anno == "Neurons"] %>% 
  {factor(c(., anno.neurons.major))}

pal.major <- brewer.pal(n = 10, "Set1") %>% 
  c("lightblue3") %>% 
  setNames(levels(anno.major)[c(9,8,3,5,10,6:7,2,1,4)])
## Warning in brewer.pal(n = 10, "Set1"): n too large, allowed maximum for palette Set1 is 9
## Returning the palette you asked for with that many colors
## Condition
pal.major %<>% c(c("yellow", brewer.pal(n = 6, "Accent")[5:6]) %>% setNames(c("CTRL","PD","MSA")))

## Comparisons
pal.major %<>% c(brewer.pal(n = 10, "Paired")[c(6,8,10)] %>% setNames(c("CTRL vs. MSA","CTRL vs. PD","PD vs. MSA")))

## Neurons
pal.major %<>% c(setNames(c("navy",
                            "mediumblue",
                            "lightslateblue",
                            "lightskyblue",
                            "lightblue",
                            "lightseagreen",
                            "blue",
                            "cadetblue1",
                            "cyan",
                            "cyan4",
                            "aquamarine2",
                            "lightcoral",
                            "brown1",
                            "orange",
                            "darkorange4",
                            "lightgoldenrod3"), 
                          levels(anno.neurons)))

## Glia
anno.glia <- c("anno_astro.qs",
               "anno_oligo.qs",
               "anno_opc.qs") %>% 
  lapply(qread) %>% 
  Reduce(c, .) %>% 
  factor() %>% 
  renameAnnotation("Homeostatic_astrocytes", "AS_homeostatic") %>% 
  renameAnnotation("Reactive_astrocytes", "AS_reactive") %>% 
  renameAnnotation("Homeostatic_LINC01608", "OL_LINC01608") %>%
  renameAnnotation("Homeostatic_SLC5A11", "OL_SLC5A11") %>% 
  renameAnnotation("Reactive_SCGZ", "OL_SGCZ")

pal.major %<>% c(setNames(c("pink",
                            pal.major["Astrocytes"],
                            pal.major["Oligodendrocytes"],
                            "chartreuse",
                            "darkolivegreen",
                            "brown4",
                            "coral"),
                          levels(anno.glia)))

## Microglia
anno.micro <- qread("anno_micro_pvm.qs") %>% 
  renameAnnotation("Steady-state","MIC_steady-state") %>% 
  renameAnnotation("Intermediate1","MIC_intermediate1") %>% 
  renameAnnotation("Intermediate2","MIC_intermediate2") %>% 
  renameAnnotation("Activated","MIC_activated")

pal.major %<>% c(setNames(c(pal.major["Microglia"],
                            "maroon4",
                            "magenta",
                            "pink3",
                            pal.major["PVMs"]),
                          levels(anno.micro)))

## Phago. assay
pal.major %<>% c(setNames(c("purple", 
                            "black",
                            pal.major[c("CTRL","PD","MSA")]), 
                          c("PBS",
                            "LPS",
                            "CTRL CSF",
                            "PD CSF",
                            "MSA CSF")))

## Houston assay
pal.major %<>% c(setNames(c(pal.major["PBS"],
                            pal.major["CTRL"],
                            "yellow3",
                            "orange",
                            pal.major["PD"],
                            "cyan4",
                            "navyblue",
                            pal.major["MSA"],
                            "pink3",
                            "red4"), 
                          c("Microglia medium",
                            "CTRL CSF, no dil.",
                            "CTRL CSF, 1:2 dil.",
                            "CTRL CSF, 1:4 dil.",
                            "PD CSF, no dil.",
                            "PD CSF, 1:2 dil.",
                            "PD CSF, 1:4 dil.",
                            "MSA CSF, no dil.",
                            "MSA CSF, 1:2 dil.",
                            "MSA CSF, 1:4 dil.")))

# Load major Conos object
con <- qread("con_major.qs", nthreads = 10)

tt <- Sys.time()

Define helper functions

getOntWithFamily <- function(cao, comp, name = "GSEA") {
  fams <- cao$test.results[[name]]$families
  
  df.all <- list(cao$.__enclos_env__$private$getOntologyPvalueResults(name = name, genes = "up", p.adj = 1, q.value = 1),
                 cao$.__enclos_env__$private$getOntologyPvalueResults(name = name, genes = "down", p.adj = 1, q.value = 1)) %>% 
    bind_rows() %>% 
    mutate(logp = -log10(p.adjust),
           comp = comp)
  
  df <- cacoa:::getOntologyFamilyChildren(df.all, fams=fams, type = name, subtype = "BP")
  
  return(list(df.all = df.all,
              df = df,
              fams = fams))
}
lianaCircos <- function(df,
                        top.interactions = 30, 
                        text.size = 1, 
                        pal = pal.major, 
                        cell.types = c("Astrocytes", 
                                       "Immune", 
                                       "Microglia",
                                       "Neurons",
                                       "OPCs",
                                       "Oligodendrocytes",
                                       "PVMs",
                                       "Pericytes/endothelial"),
                        big.gap = 5,
                        small.gap = 2,
                        arrow.width = 3,
                        link.ramp.rel = T,
                        link.sort = F,
                        scale = F,
                        arrow.head.width = 0.3,
                        arrow.head.length = 0.3,
                        link.ramp.col = c("navy", "grey", "firebrick")) {
  input_df <- df %>% 
    slice_max(order_by = score, n = top.interactions) %>% 
    mutate(target = paste0(target, " ")) %>% 
    mutate(source_lig = paste0(source, "|", ligand), 
           target_rec = paste0(target, "|", receptor))
  
  if (link.ramp.rel) {
    arr_wd <- rep(arrow.width, nrow(input_df))
  } else {
    arr_wd <- (((input_df$score - min(input_df$score))/(max(input_df$score) - min(input_df$score))) * (arrow.width)) + 1
  }
  
  # Colors and segments
  anno.col <- setNames(pal, 
                       cell.types) %>% 
    c(., "Oligodendrocytes " = unname(.["Oligodendrocytes"]))
  
  cell_cols <- anno.col[unique(c(unique(input_df$source), unique(input_df$target), "Oligodendrocytes "))]
  
  link_cols <- c()
  
  if (!link.ramp.rel) {
    for (i in input_df$source_lig) {
      link_cols <- c(link_cols, cell_cols[str_extract(i, 
                                                      "[^|]+")])
    }
  } else {
    input_df %<>% 
      arrange(score)
    
    df.down <- input_df %>% filter(score <= 0)
    link_down <- colorRampPalette(c(link.ramp.col[1], link.ramp.col[2]))(nrow(df.down))
    
    df.up <- input_df %>% filter(score > 0)
    link_up <- colorRampPalette(c(link.ramp.col[2], link.ramp.col[3]))(nrow(df.up))
    
    link_cols <- c(link_down, link_up)
  }
  
  segments <- unique(c(paste0(input_df$source, "|", input_df$ligand), 
                       paste0(input_df$target, "|", input_df$receptor)))
  
  grp <- str_extract(segments, "[^|]+") %>% 
    setNames(segments)
  
  # Redo colors
  cell_cols2 <- grp
  for (i in unique(grp)) {
    cell_cols2[cell_cols2 == i] <- cell_cols[i]
  }
  
  # Plot
  input_df %>% 
    select(source_lig, target_rec, score) %>%
    chordDiagram(directional = 1, 
                 group = grp,
                 scale = scale, 
                 diffHeight = 0.005, 
                 direction.type = c("arrows"),
                 link.arr.type = "triangle", 
                 annotationTrack = c(), 
                 preAllocateTracks = list(
                   list(track.height = 0.05),
                   list(track.height = 0.25),
                   list(track.height = 0.05)), 
                 big.gap = big.gap,
                 transparency = 1,
                 link.arr.lwd = arr_wd,
                 link.arr.col = link_cols,
                 link.arr.length = arrow.head.length,
                 link.arr.width = arrow.head.width,
                 small.gap = small.gap
    )
  
  circos.track(track.index = 2, panel.fun = function(x, y) {
    circos.text(CELL_META$xcenter, 
                CELL_META$ylim[1], 
                str_extract(CELL_META$sector.index, "[^|]+$"), 
                facing = "clockwise", 
                niceFacing = TRUE, 
                adj = c(0, 0.55), 
                cex = 1)
  }, bg.border = NA)
  
  # Split segments
  for (l in segments) {
    highlight.sector(l, track.index = 3, col = cell_cols2[l])
  }
  
  # Add ligand/receptor track
  ## Ligand
  highlight.sector(input_df$source_lig, 
                   track.index = 1, 
                   col = "black", 
                   text = "Ligands", 
                   cex = 1, 
                   text.col = "white", 
                   niceFacing = TRUE)
  ## Receptor
  highlight.sector(input_df$target_rec, 
                   track.index = 1, 
                   col = "white", 
                   text = "Receptors", 
                   cex = 1, 
                   text.col = "black", 
                   border = "black", 
                   niceFacing = TRUE)
  
  # Legends
  minmax <- input_df %>% 
    pull(score) %>% 
    {pmax(abs(min(.)), max(.))} %>% 
    formatC(digits = 1) %>% 
    as.numeric()
  
  col.range = c(-minmax, 0, minmax)
  lgd_links = Legend(at = col.range, 
                     col_fun = colorRamp2(col.range, link.ramp.col), 
                     title_position = "topleft", 
                     title = "Links")
  
  lgd_ct <- Legend(labels = unique(c(input_df$source, input_df$target)), 
                   title = "Cell type", 
                   type = "points",
                   legend_gp = gpar(col = "transparent"), 
                   background = cell_cols[unique(c(input_df$source, input_df$target))])
  
  lgd_list_vertical = packLegend(lgd_ct, lgd_links)
  
  draw(lgd_list_vertical, 
       just = c("left", "bottom"), 
       x = unit(5, "mm"), 
       y = unit(5, "mm"))
  
  circos.clear()
}
getTscanTrajectory <- function(con, anno) {
  requireNamespace("TSCAN", quietly = T)
  
  emb <- con$embedding[names(anno), ]
  anno %<>% .[rownames(emb)]
  
  cent.ids <- emb %>% 
    rownames() %>% 
    split(anno)
  
  centroids <- cent.ids %>% 
    lapply(\(cid) emb[cid, ]) %>% 
    lapply(colMeans) %>% 
    bind_rows() %>% 
    t() %>% 
    `colnames<-`(c("UMAP1","UMAP2"))
  
  mst <- centroids %>% 
    TSCAN::createClusterMST(clusters = NULL)
  
  line.data <- TSCAN::reportEdges(centroids, mst = mst, clusters = NULL)
  
  return(line.data)
}
plotGenePseudoBulk <- function(gene, cm.pseudo, legend = T) {
  idx <- cm.pseudo %>% 
    colnames() %>% 
    data.frame(id = .) %>% 
    mutate(condition = strsplit(id, "_|!!") %>% sget(1),
           ct = strsplit(id, "!!") %>% sget(2)) %>% 
    mutate(ord = order(condition, ct))
  
  x <- cm.pseudo %>% 
    .[match(gene, rownames(.)), match(colnames(na.omit(.)), colnames(.))] %>%
    .[idx$ord]
  
  plot.dat <- x %>% 
    {data.frame(sample = names(.), 
                value = unname(.))} %>%
    mutate(anno = strsplit(sample, "!!") %>% 
             sget(2),
           condition = strsplit(sample, "!!|_") %>% 
             sget(1)) %>%
    mutate(anno = factor(anno))
  
  stat.test <- plot.dat %>% 
    group_by(anno) %>% 
    rstatix::wilcox_test(value ~ condition) %>% 
    rstatix::add_xy_position(x = "anno", step.increase = 0.1)
  
  p <- plot.dat %>% 
    ggplot(aes(anno, value)) +
    geom_boxplot(aes(fill = condition)) +
    theme_bw() +
    theme(line = element_blank(),
          axis.text.x = element_text(angle = 45, hjust = 1, vjust = 1),
          axis.ticks.x = element_line()) +
    labs(x = "", y = "Normalized pseudobulk expression", fill = "", title = paste0(gene, " expression")) +
    scale_fill_manual(values = ggsci::pal_jama()(3)) +
    stat_compare_means(aes(fill = condition), method = "kruskal", label = "p.signif") +
    stat_pvalue_manual(stat.test)
  
  if (!legend) p <- p + guides(fill = "none")
  
  return(p)
}

Figure 1

Load data

cao_msa <- qread("cao_major_msa.qs", nthreads = 10)
cao_pd <- qread("cao_major_pd.qs", nthreads = 10)
cao_dis <- qread("cao_major_dis.qs", nthreads = 10)

Figure 1b

con$plotGraph(groups = anno.major, 
              embedding = "UMAP_1_0.001_5",
              size = 0.1, 
              palette = pal.major, 
              font.size = 3, 
              raster = T, 
              show.labels = T,
              plot.na = F, 
              show.legend = T,
              legend.title = "Cell type", 
              alpha = 0.05) + 
  dotSize(3) +
  labs(x="UMAP1", y= "UMAP2") +
  theme(line = element_blank()) +
  ylim(c(-20, 15)) +
  xlim(c(-21, 11))

Figure 1c

cm.merged <- con$getJointCountMatrix()

markers <- c("AQP4","PTPRC","CSF1R","RBFOX3","SLC17A7","GAD1","PPP1R1B","MOG","VCAN","PDGFRB","MRC1")

dotPlot(markers, 
        cm.merged, 
        anno.major %>% 
          factor(., levels = sort(levels(.))[c(1,3,5,2,4,6,7:10)]), 
        cols = c("white","firebrick"), 
        gene.order = markers)

Figure 1d

spc <- con$getDatasetPerCell()

cpc <- spc %>% 
  as.character() %>% 
  grepl.replace(c("CTRL","MSA","PD")) %>% 
  `names<-`(names(spc))

con$plotGraph(groups = cpc, 
              embedding = "UMAP_1_0.001_5",
              size = 0.1, 
              alpha = 0.05,
              palette = pal.major, 
              font.size = 3, 
              raster = F, 
              show.labels = T,
              plot.na = F,
              mark.groups = F,
              show.legend = T) + 
  labs(x="UMAP1", y= "UMAP2", col = "") +
  theme(legend.position = "bottom",
        line = element_blank()) +
  dotSize(3)

Figure 1e

cao_msa$plotCellLoadings(show.pvals = F, 
                         alpha = 0)$data %>% 
  ggplot(aes(ind, values, fill = ind)) + 
  geom_hline(yintercept = 0, col = "black") +
  geom_violin() + 
  coord_flip() + 
  theme_bw() + 
  scale_fill_manual(values = pal.major) + 
  theme(legend.position = "none",
        line = element_blank()) +
  scale_y_continuous(breaks = c(-1,0,1), 
                     limits = c(-1,1),
                     labels = c("-1\nCTRL",0,"1\nMSA")) +
  labs(x = "", y = "")

Figure 1f

cao_pd$plotCellLoadings(show.pvals = F, 
                        alpha = 0)$data %>% 
  ggplot(aes(ind, values, fill = ind)) + 
  geom_hline(yintercept = 0, col = "black") +
  geom_violin() + 
  coord_flip() + 
  theme_bw() + 
  scale_fill_manual(values = pal.major) + 
  theme(legend.position = "none",
        line = element_blank()) +
  scale_y_continuous(breaks = c(-1,0,1), 
                     limits = c(-1,1),
                     labels = c("-1\nCTRL",0,"1\nPD")) +
  geom_vline(xintercept = 7.5, col = "red") +
  labs(x = "", y = "")
## Warning: Removed 15 rows containing non-finite outside the scale range
## (`stat_ydensity()`).

Figure 1g

cao_dis$plotCellLoadings(show.pvals = F, 
                         alpha = 0)$data %>% 
  ggplot(aes(ind, values, fill = ind)) + 
  geom_hline(yintercept = 0, col = "black") +
  geom_violin() + 
  coord_flip() + 
  theme_bw() + 
  scale_fill_manual(values = pal.major) + 
  theme(legend.position = "none",
        line = element_blank()) +
  scale_y_continuous(breaks = c(-1,0,1), 
                     limits = c(-1,1),
                     labels = c("-1\nPD",0,"1\nMSA")) +
  geom_vline(xintercept = 5.5, col = "red") +
  labs(x = "", y = "")

Figure 1h

cao_msa$cell.groups.palette <- pal.major
cao_msa$plotExpressionShiftMagnitudes() + labs(y = "Normalized expression distance")

Figure 1i

cao_pd$plotExpressionShiftMagnitudes() + labs(y = "Normalized expression distance")

cao_pd$cell.groups.palette <- pal.major

Figure 1j

cao_dis$plotExpressionShiftMagnitudes() + labs(y = "Normalized expression distance")
## Notch went outside hinges
## ℹ Do you want `notch = FALSE`?

cao_dis$cell.groups.palette <- pal.major

Figure 2

Load data

cao <- qread("cao_neurons.qs", nthreads = 10)
cao_msa <- qread("cao_neurons_msa.qs", nthreads = 10)
cao_pd <- qread("cao_neurons_pd.qs", nthreads = 10)
cao_dis <- qread("cao_neurons_dis.qs", nthreads = 10)

Figure 2a

cao$plotEmbedding(groups = cao$cell.groups,
                  size = 0.1, 
                  palette = pal.major, 
                  font.size = 3, 
                  raster = T, 
                  show.labels = T,
                  plot.na = F,
                  alpha = 0.1) + 
  theme(line = element_blank()) +
  labs(x="largeVis1", y= "largeVis2")

Figure 2b

cm.merged <- cao$data.object$getJointCountMatrix()

markers <- c("GAD1","GAD2","PPP1R1B","SLC17A7","CHAT","CALB2","DCC","ID2","MEIS2","ST18","NKX2-1","NR2F2","PAX6","PVALB","RXFP1","SST","VIP","RELN","SATB2","DRD1","DRD2","FOXP2", "LRP8", "VLDLR")

dotPlot(markers, 
        cm.merged, 
        anno.neurons, 
        cols = c("white","firebrick"), 
        gene.order = markers)

Figure 2c

cao_msa$plotCellLoadings(show.pvals = F, 
                         alpha = 0)$data %>% 
  ggplot(aes(ind, values, fill = ind)) + 
  geom_hline(yintercept = 0, col = "black") +
  geom_violin() + 
  coord_flip() + 
  theme_bw() + 
  scale_fill_manual(values = pal.major) + 
  theme(legend.position = "none",
        line = element_blank()) +
  scale_y_continuous(breaks = c(-1,0,1), 
                     limits = c(-1,1),
                     labels = c("-1\nCTRL",0,"1\nMSA")) +
  labs(x = "", y = "")

Figure 2d

cao_pd$plotCellLoadings(show.pvals = F, 
                        alpha = 0)$data %>% 
  ggplot(aes(ind, values, fill = ind)) + 
  geom_hline(yintercept = 0, col = "black") +
  geom_violin() + 
  coord_flip() + 
  theme_bw() + 
  scale_fill_manual(values = pal.major) + 
  theme(legend.position = "none",
        line = element_blank()) +
  scale_y_continuous(breaks = c(-1,0,1), 
                     limits = c(-1,1),
                     labels = c("-1\nCTRL",0,"1\nPD")) +
  geom_vline(xintercept = 13.5, col = "red") +
  labs(x = "", y = "")

Figure 2e

cao_dis$plotCellLoadings(show.pvals = F, 
                         alpha = 0)$data %>% 
  ggplot(aes(ind, values, fill = ind)) + 
  geom_hline(yintercept = 0, col = "black") +
  geom_violin() + 
  coord_flip() + 
  theme_bw() + 
  scale_fill_manual(values = pal.major) + 
  theme(legend.position = "none",
        line = element_blank()) +
  scale_y_continuous(breaks = c(-1,0,1), 
                     limits = c(-1,1),
                     labels = c("-1\nPD",0,"1\nMSA")) +
  labs(x = "", y = "")

Figure 2f

cao_msa$plotExpressionShiftMagnitudes() + labs(y = "Normalized expression distance")
## Notch went outside hinges
## ℹ Do you want `notch = FALSE`?

cao_pd$plotExpressionShiftMagnitudes() + labs(y = "Normalized expression distance")
## Notch went outside hinges
## ℹ Do you want `notch = FALSE`?
## Notch went outside hinges
## ℹ Do you want `notch = FALSE`?

cao_dis$plotExpressionShiftMagnitudes() + labs(y = "Normalized expression distance")
## Notch went outside hinges
## ℹ Do you want `notch = FALSE`?
## Notch went outside hinges
## ℹ Do you want `notch = FALSE`?
## Notch went outside hinges
## ℹ Do you want `notch = FALSE`?
## Notch went outside hinges
## ℹ Do you want `notch = FALSE`?
## Notch went outside hinges
## ℹ Do you want `notch = FALSE`?

Figure 2g

sample.groups <- cao$sample.groups

cao$estimateCellDensity(method = "graph")
cao$estimateDiffCellDensity(type = "subtract")

cao$plotCellDensity(show.cell.groups = F, show.legend = F, color.range = c(0, 0.00035))$CTRL +
  geom_circle(aes(x0 = 30.9, y0 = 10, r = 10)) +
  geom_circle(aes(x0 = 17.5, y0 = 24, r = 4), col = "cyan3") +
  theme(line = element_blank())

# MSA
cao$sample.groups <- sample.groups %>% 
  names() %>% 
  grepl.replace(c("CTRL","MSA","PD")) %>% 
  `names<-`(sample.groups %>% names()) %>% 
  .[. %in% c("CTRL","MSA")]
cao$target.level <- "MSA"
cao$estimateCellDensity(method = "graph", name = "msa.density")
cao$estimateDiffCellDensity(type = "subtract", name = "msa.density")
cao$plotCellDensity(show.cell.groups = F, name = "msa.density", show.legend = F, color.range = c(0, 0.00035))$MSA +
  geom_circle(aes(x0 = 30.9, y0 = 10, r = 10)) +
  geom_circle(aes(x0 = 17.5, y0 = 24, r = 4), col = "cyan3") +
  theme(line = element_blank())

# PD
cao$sample.groups <- sample.groups %>%
  names() %>%
  grepl.replace(c("CTRL","MSA","PD")) %>%
  `names<-`(sample.groups %>% names()) %>%
  .[. %in% c("CTRL","PD")]
cao$target.level <- "PD"
cao$estimateCellDensity(method = "graph", name = "pd.density")
cao$estimateDiffCellDensity(type = "subtract", name = "pd.density")
cao$plotCellDensity(show.cell.groups = F, name = "pd.density", show.legend = T, color.range = c(0, 0.00035))$PD +
  geom_circle(aes(x0 = 30.9, y0 = 10, r = 10)) +
  geom_circle(aes(x0 = 17.5, y0 = 24, r = 4), col = "cyan3") +
  theme(line = element_blank())

Figure 3

Load data

con.glia <- qread("con_oligo_astro_opc.qs", nthreads = 10)

cao_msa <- qread("cao_oligo_astro_opc_msa.qs", nthreads = 10)
cao_pd <- qread("cao_oligo_astro_opc_pd.qs", nthreads = 10)
cao_dis <- qread("cao_oligo_astro_opc_dis.qs", nthreads = 10)

Figure 3a

con.glia$plotGraph(groups = anno.glia, 
                   plot.na = F, 
                   size = 0.1,
                   palette = pal.major, 
                   font.size = 3, 
                   raster = T, 
                   show.labels = T, embedding = "largeVis_CPCA_AS01",
                   alpha = 0.05) + 
  labs(x = "largeVis1", y = "largeVis2") +
  theme(line = element_blank())

Figure 3b

cm.merged <- con.glia$getJointCountMatrix()

markers <- c("AQP4","TNC","MOG","LINC01608","SLC5A11","SGCZ","VCAN","OLIG2")

dotPlot(markers, 
        cm.merged, 
        anno.glia,
        cols = c("white","firebrick"), 
        gene.order = markers)

Figure 3c

cao_msa$plotCellLoadings(show.pvals = F, 
                         alpha = 0)$data %>% 
  ggplot(aes(ind, values, fill = ind)) +
  geom_hline(yintercept = 0, col = "black") +
  geom_violin() + 
  coord_flip() + 
  theme_bw() + 
  scale_fill_manual(values = pal.major) + 
  theme(legend.position = "none",
        line = element_blank()) +
  scale_y_continuous(breaks = c(-1,0,1), 
                     limits = c(-1,1),
                     labels = c("-1\nCTRL",0,"1\nMSA")) +
  geom_vline(xintercept = 6.5, col = "red") +
  labs(x = "", y = "")
## Warning: Removed 13 rows containing non-finite outside the scale range
## (`stat_ydensity()`).

Figure 3d

cao_pd$plotCellLoadings(show.pvals = F, 
                        alpha = 0)$data %>% 
  ggplot(aes(ind, values, fill = ind)) + 
  geom_hline(yintercept = 0, col = "black") +
  geom_violin() + 
  coord_flip() + 
  theme_bw() + 
  scale_fill_manual(values = pal.major) + 
  theme(legend.position = "none",
        line = element_blank()) +
  scale_y_continuous(breaks = c(-1,0,1), 
                     limits = c(-1,1),
                     labels = c("-1\nCTRL",0,"1\nPD")) +
  geom_vline(xintercept = 4.5, col = "red") +
  labs(x = "", y = "")
## Warning: Removed 59 rows containing non-finite outside the scale range
## (`stat_ydensity()`).

Figure 3e

cao_dis$plotCellLoadings(show.pvals = F, 
                         alpha = 0)$data %>% 
  ggplot(aes(ind, values, fill = ind)) + 
  geom_hline(yintercept = 0, col = "black") +
  geom_violin() + 
  coord_flip() + 
  theme_bw() + 
  scale_fill_manual(values = pal.major) + 
  theme(legend.position = "none",
        line = element_blank()) +
  scale_y_continuous(breaks = c(-1,0,1), 
                     limits = c(-1,1),
                     labels = c("-1\nPD",0,"1\nMSA")) +
  geom_vline(xintercept = 6.5, col = "red") +
  labs(x = "", y = "")
## Warning: Removed 10 rows containing non-finite outside the scale range
## (`stat_ydensity()`).

Figure 3f

cao_msa$plotExpressionShiftMagnitudes() + labs(y = "Normalized expression distance")

cao_pd$plotExpressionShiftMagnitudes() + labs(y = "Normalized expression distance")

cao_dis$plotExpressionShiftMagnitudes() + labs(y = "Normalized expression distance")

##              used    (Mb) gc trigger    (Mb)   max used    (Mb)
## Ncells   10915084   583.0   24337685  1299.8   24337685  1299.8
## Vcells 3017281998 23020.1 7084113178 54047.5 5839045767 44548.4

Figure 4

Figure 4a,b

cao_msa <- qread("cao_astro_msa.qs", nthreads = 10)
cao_pd <- qread("cao_astro_pd.qs", nthreads = 10)
cao_dis <- qread("cao_astro_dis.qs", nthreads = 10)

df.all <- Map(getOntWithFamily, list(cao_msa, cao_pd, cao_dis), c("CTRL vs. MSA", "CTRL vs. PD", "PD vs. MSA")) %>% 
  lget("df.all") %>% 
  bind_rows() %>% 
  mutate(Group = paste0("AS_", strsplit(Group, "_") %>% sget(1) %>% tolower()))
## Loading required namespace: DOSE
## 
# Select relevant pathways
p.ids <- c("GO:0050821", "GO:0050728", "GO:0006986", "GO:0030168", "GO:0030001", "GO:1904707", "GO:0140053", "GO:0006486", "GO:0010717", "GO:0031113", "GO:0032675", "GO:0061041", "GO:0071222", "GO:0045766", "GO:0006487", "GO:0097501", "GO:0042776", "GO:0045047", "GO:0044794", "GO:0035633", "GO:0097242", "GO:0006120", "GO:0042026", "GO:0006979", "GO:0071222", "GO:0017015", "GO:0009611", "GO:1901201", "GO:0042594", "GO:0097501", "GO:0006123", "GO:0006909", "GO:0007179", "GO:0006120", "GO:0097501")

Figure 4a

ct <- "AS_homeostatic"

df.sel <- df.all %>% 
  filter(Group == ct)

p1 <- ggplot(df.sel, aes(NES, logp, col = comp, shape = comp)) +
  geom_point(data = ~filter(., p.adjust > 0.05), size = 0.1, alpha = 0.5, col = "black") +
  geom_point(data = ~filter(., p.adjust <= 0.05), size = 2, alpha = 0.8) +
  theme_bw() +
  geom_hline(yintercept = -log10(0.05), linetype = "dashed") +
  scale_color_manual(values = pal.major) +
  theme(line = element_blank()) +
  labs(y = "-log10(adj. p)", shape = "Comparison", col = "Comparison") +
  dotSize(3)

df <- Map(getOntWithFamily, list(cao_msa, cao_pd, cao_dis), c("CTRL vs. MSA", "CTRL vs. PD", "PD vs. MSA")) %>% 
  lget("df") %>% 
  bind_rows() %>% 
  mutate(Group = paste0("AS_", strsplit(Group, "_") %>% sget(1) %>% tolower())) %>% 
  filter(Group == ct,
         ID %in% p.ids)

p2 <- df %>% 
  filter(p.adjust <= 0.05, NES > 0) %>% 
  mutate(Group = factor(Group, levels = sort(unique(Group)))) %>% 
  select(Description, NES, Group, comp) %>%
  as.data.frame() %>% 
  group_by(comp, Group) %>% 
  group_split() %>% 
  lapply(dplyr::slice, seq(5)) %>%
  bind_rows() %>% 
  mutate(., x = 0, y = rev(seq(nrow(.)))) %>% 
  ggplot(aes(x, y, label = Description, col = comp)) + 
  geom_text(hjust = 0) +
  theme_void() +
  xlim(0, 1) +
  labs(title = "Selected terms for up-regulated genes") +
  scale_color_manual(values = pal.major) +
  guides(col = "none")

p3 <- df %>% 
  filter(p.adjust <= 0.05, NES < 0) %>% 
  select(Description, NES, Group, comp) %>%
  group_by(comp, Group) %>% 
  group_split() %>% 
  lapply(dplyr::slice, seq(5)) %>% 
  bind_rows() %>% 
  mutate(., x = 0, y = rev(seq(nrow(.)))) %>% 
  ggplot(aes(x, y, label = Description, col = comp)) + 
  geom_text(hjust = 0) +
  theme_void() +
  xlim(0, 1) +
  labs(title = "Selected terms for down-regulated genes") +
  scale_color_manual(values = pal.major) +
  guides(col = "none")

plot_grid(plotlist = list(p3, p1, p2), ncol = 3, rel_widths = c(0.9, 1, 0.9))

Figure 4b

ct <- "AS_reactive"

df.sel <- df.all %>% 
  filter(Group == ct)

p1 <- ggplot(df.sel, aes(NES, logp, col = comp, shape = comp)) +
  geom_point(data = ~filter(., p.adjust > 0.05), size = 0.1, alpha = 0.5, col = "black") +
  geom_point(data = ~filter(., p.adjust <= 0.05), size = 2, alpha = 0.8) +
  theme_bw() +
  geom_hline(yintercept = -log10(0.05), linetype = "dashed") +
  scale_color_manual(values = pal.major) +
  theme(line = element_blank()) +
  labs(y = "-log10(adj. p)", shape = "Comparison", col = "Comparison") +
  dotSize(3)

df <- Map(getOntWithFamily, list(cao_msa, cao_pd, cao_dis), c("CTRL vs. MSA", "CTRL vs. PD", "PD vs. MSA")) %>% 
  lget("df") %>% 
  bind_rows() %>% 
  mutate(Group = paste0("AS_", strsplit(Group, "_") %>% sget(1) %>% tolower())) %>% 
  filter(Group == ct,
         ID %in% p.ids)

p2 <- df %>% 
  filter(p.adjust <= 0.05, NES > 0) %>% 
  mutate(Group = factor(Group, levels = sort(unique(Group)))) %>% 
  select(Description, NES, Group, comp) %>%
  group_by(comp, Group) %>% 
  group_split() %>% 
  lapply(dplyr::slice, seq(5)) %>%
  bind_rows() %>% 
  mutate(., x = 0, y = rev(seq(nrow(.)))) %>% 
  ggplot(aes(x, y, label = Description, col = comp)) + 
  geom_text(hjust = 0) +
  theme_void() +
  xlim(0, 1) +
  labs(title = "Selected terms for up-regulated genes") +
  scale_color_manual(values = pal.major) +
  guides(col = "none")

p3 <- df %>% 
  filter(p.adjust <= 0.05, NES < 0) %>% 
  select(Description, NES, Group, comp) %>%
  group_by(comp, Group) %>% 
  group_split() %>% 
  lapply(dplyr::slice, seq(5)) %>% 
  bind_rows() %>% 
  mutate(., x = 0, y = rev(seq(nrow(.)))) %>% 
  ggplot(aes(x, y, label = Description, col = comp)) + 
  geom_text(hjust = 0) +
  theme_void() +
  xlim(0, 1) +
  labs(title = "Selected terms for down-regulated genes") +
  scale_color_manual(values = pal.major) +
  guides(col = "none")

plot_grid(plotlist = list(p3, p1, p2), ncol = 3, rel_widths = c(0.9, 1, 0.9))

Figure 4c

cao_msa <- qread("cao_oligo_msa.qs", nthreads = 10)
cao_pd <- qread("cao_oligo_pd.qs", nthreads = 10)
cao_dis <- qread("cao_oligo_dis.qs", nthreads = 10)

df.all <- Map(getOntWithFamily, list(cao_msa, cao_pd, cao_dis), c("CTRL vs. MSA", "CTRL vs. PD", "PD vs. MSA")) %>% 
  lget("df.all") %>% 
  bind_rows() %>% 
  mutate(Group = paste0("OL_", strsplit(Group, "_") %>% sget(2) %>% gsub("SCGZ", "SGCZ", .)))

# Select relevant pathways
p.ids <- c("GO:0071345", "GO:0045596", "GO:0060271", "GO:0048709", "GO:0006986", "GO:0016032", "GO:0060271", "GO:0042026", "GO:0006986", "GO:0120034", "GO:0071346", "GO:1904262", "GO:0042776", "GO:0061077", "GO:0050821", "GO:0045047", "GO:0042776", "GO:0050821", "GO:0045047", "GO:0007042", "GO:0042776", "GO:0061077", "GO:0045047", "GO:0042776", "GO:0045047", "GO:2000249", "GO:0002474", "GO:0042026", "GO:0002040", "GO:0046330", "GO:0043542", "GO:0030036", "GO:0032981", "GO:0007042")

ct <- "OL_SLC5A11"

df.sel <- df.all %>% 
  filter(Group == ct)

p1 <- ggplot(df.sel, aes(NES, logp, col = comp, shape = comp)) +
  geom_point(data = ~filter(., p.adjust > 0.05), size = 0.1, alpha = 0.5, col = "black") +
  geom_point(data = ~filter(., p.adjust <= 0.05), size = 2, alpha = 0.8) +
  theme_bw() +
  geom_hline(yintercept = -log10(0.05), linetype = "dashed") +
  scale_color_manual(values = pal.major) +
  theme(line = element_blank()) +
  labs(y = "-log10(adj. p)", shape = "Comparison", col = "Comparison") +
  dotSize(3)

df <- Map(getOntWithFamily, list(cao_msa, cao_pd, cao_dis), c("CTRL vs. MSA", "CTRL vs. PD", "PD vs. MSA")) %>% 
  lget("df") %>% 
  bind_rows() %>% 
  mutate(Group = paste0("OL_", strsplit(Group, "_") %>% sget(2) %>% gsub("SCGZ", "SGCZ", .))) %>% 
  filter(Group == ct,
         ID %in% p.ids)

p2 <- df %>% 
  filter(p.adjust <= 0.05, NES > 0) %>% 
  mutate(Group = factor(Group, levels = sort(unique(Group)))) %>% 
  select(Description, NES, Group, comp) %>%
  group_by(comp, Group) %>% 
  group_split() %>% 
  lapply(dplyr::slice, seq(5)) %>%
  bind_rows() %>% 
  mutate(., x = 0, y = rev(seq(nrow(.)))) %>% 
  ggplot(aes(x, y, label = Description, col = comp)) + 
  geom_text(hjust = 0) +
  theme_void() +
  xlim(0, 1) +
  labs(title = "Selected terms for up-regulated genes") +
  scale_color_manual(values = pal.major) +
  guides(col = "none")

p3 <- df %>% 
  filter(p.adjust <= 0.05, NES < 0) %>% 
  select(Description, NES, Group, comp) %>%
  group_by(comp, Group) %>% 
  group_split() %>% 
  lapply(dplyr::slice, seq(5)) %>% 
  bind_rows() %>% 
  mutate(., x = 0, y = rev(seq(nrow(.)))) %>% 
  ggplot(aes(x, y, label = Description, col = comp)) + 
  geom_text(hjust = 0) +
  theme_void() +
  xlim(0, 1) +
  labs(title = "Selected terms for down-regulated genes") +
  scale_color_manual(values = pal.major) +
  guides(col = "none")

plot_grid(plotlist = list(p3, p1, p2), ncol = 3, rel_widths = c(0.9, 1, 0.9))

Figure 4d,e

cm.merged <- con$getJointCountMatrix(raw = T) %>% 
  Matrix::t() %>% 
  .[, colnames(.) %in% names(anno.glia)]

# Create sample-wise annotation
anno.donor <- con$getDatasetPerCell()[colnames(cm.merged)]
anno.subtype <- anno.glia %>% 
  .[!is.na(.)] %>% 
  factor()

idx <- intersect(anno.donor %>% names(), anno.subtype %>% names())

anno.donor %<>% .[idx]
anno.subtype %<>% .[idx] %>% 
  {`names<-`(as.character(.), names(.))}

anno.final <- paste0(anno.donor,"!!",anno.subtype) %>% 
  `names<-`(anno.donor %>% names())

# Create pseudo CM
cm.pseudo <- sccore::collapseCellsByType(cm.merged %>% Matrix::t(), 
                                         groups = anno.final, min.cell.count = 1) %>% 
  t() %>%
  apply(2, as, "integer")

cm.pseudo %<>% 
  {. + 1} %>% 
  DESeq2::DESeqDataSetFromMatrix(., 
                                 colnames(.) %>% 
                                   strsplit("_") %>% 
                                   sget(1) %>% 
                                   data.frame() %>% 
                                   `dimnames<-`(list(colnames(cm.pseudo), "group")), 
                                 design = ~ group) %>% 
  DESeq2::estimateSizeFactors() %>% 
  DESeq2::counts(normalized = T)
## converting counts to integer mode
## Warning in DESeqDataSet(se, design = design, ignoreRank): some variables in
## design formula are characters, converting to factors

Figure 4d

# For DE between visits
genes <- "TLR1"

idx <- cm.pseudo %>% 
  colnames() %>% 
  data.frame(id = .) %>% 
  mutate(condition = strsplit(id, "_|!!") %>% sget(1),
         ct = strsplit(id, "!!") %>% sget(2)) %>% 
  mutate(ord = order(condition, ct))

x <- cm.pseudo %>% 
  .[match(genes, rownames(.)), match(colnames(na.omit(.)), colnames(.))] %>%
  .[idx$ord]

plot.dat <- x %>% 
  {data.frame(sample = names(.), 
              value = unname(.))} %>%
  mutate(anno = strsplit(sample, "!!") %>% 
           sget(2),
         condition = strsplit(sample, "!!|_") %>% 
           sget(1)) %>% 
  filter(anno %in% c("OL_LINC01608", "OL_SGCZ", "OL_SLC5A11")) %>% 
  mutate(anno = factor(anno))

stat.test <- plot.dat %>% 
  group_by(anno) %>% 
  rstatix::wilcox_test(value ~ condition) %>% 
  rstatix::add_xy_position(x = "anno", step.increase = 0.1, fun = "mean_sd", scales = "free")

plot.dat %>% 
  ggplot(aes(anno, value)) +
  geom_boxplot(aes(fill = condition)) +
  theme_bw() +
  theme(line = element_blank(),
        axis.ticks.x = element_line(),
        axis.text.x = element_text(angle = 45, hjust = 1, vjust = 1)) +
  labs(x = "", y = "Normalized pseudobulk expression", fill = "", title = "TLR1 expression") +
  scale_fill_manual(values = ggsci::pal_jama()(3)) +
  stat_compare_means(aes(fill = condition), method = "kruskal", label = "p.signif", label.y = 8.5e2) +
  stat_pvalue_manual(stat.test)
## Warning in stat_compare_means(aes(fill = condition), method = "kruskal", :
## Ignoring unknown aesthetics: fill

Figure 4e

# For DE between visits
genes <- "LRP1"

idx <- cm.pseudo %>% 
  colnames() %>% 
  data.frame(id = .) %>% 
  mutate(condition = strsplit(id, "_|!!") %>% sget(1),
         ct = strsplit(id, "!!") %>% sget(2)) %>% 
  mutate(ord = order(condition, ct))

x <- cm.pseudo %>% 
  .[match(genes, rownames(.)), match(colnames(na.omit(.)), colnames(.))] %>%
  .[idx$ord]

plot.dat <- x %>% 
  {data.frame(sample = names(.), 
              value = unname(.))} %>%
  mutate(anno = strsplit(sample, "!!") %>% 
           sget(2),
         condition = strsplit(sample, "!!|_") %>% 
           sget(1)) %>% 
  filter(anno %in% c("OL_LINC01608", "OL_SGCZ", "OL_SLC5A11")) %>% 
  mutate(anno = factor(anno))

stat.test <- plot.dat %>% 
  group_by(anno) %>% 
  rstatix::wilcox_test(value ~ condition) %>% 
  rstatix::add_xy_position(x = "anno", step.increase = 0.1)

plot.dat %>% 
  ggplot(aes(anno, value)) +
  geom_boxplot(aes(fill = condition)) +
  theme_bw() +
  theme(line = element_blank(),
        axis.text.x = element_text(angle = 45, hjust = 1, vjust = 1),
        axis.ticks.x = element_line()) +
  labs(x = "", y = "Normalized pseudobulk expression", fill = "", title = "LRP1 expression") +
  scale_fill_manual(values = ggsci::pal_jama()(3)) +
  stat_compare_means(aes(fill = condition), method = "kruskal", label = "p.signif", label.y = 175) +
  stat_pvalue_manual(stat.test) +
  ylim(c(0, 180))
## Warning in stat_compare_means(aes(fill = condition), method = "kruskal", :
## Ignoring unknown aesthetics: fill
## Warning: Removed 1 row containing non-finite outside the scale range
## (`stat_bracket()`).

Figure 4f

For calculation of data, see Liana.ipynb.

dat <- read.delim("liana_res.csv",
                  sep = ",",
                  header = T) %>%
  mutate(group = strsplit(sample, "_") %>%
           sget(1))

dat.plot <- dat %>% 
  dplyr::rename(ligand = ligand_complex,
         receptor = receptor_complex,
         score = lrscore) %>%
  filter(target == "Oligodendrocytes",
         ligand %in% c("BMP1", "SERPINE2", "PSAP", "APOE", "APP", "SERPING1"),
         receptor %in% c("LRP1", "BMPR1A", "LRP4")) %>%
  group_by(group, ligand, receptor, source, target) %>% 
  summarize(score = mean(score)) %>% 
  ungroup() %>% 
  arrange(group, source, target, ligand, receptor)
## `summarise()` has grouped output by 'group', 'ligand', 'receptor', 'source'.
## You can override using the `.groups` argument.
dat.msa <- dat.plot %>% 
  filter(group == "MSA",
         score > 0.39) %>% 
  mutate(lrst = paste0(ligand, receptor, source, target))

dat.pd <- dat.plot %>% 
  mutate(lrst = paste0(ligand, receptor, source, target)) %>% 
  filter(group == "PD",
         lrst %in% dat.msa$lrst)

dat.rel <- dat.msa %>% 
  mutate(score = score - dat.pd$score)

dat.rel %>% 
  lianaCircos()

Figure 5 & EDF13

Load data

cao <- qread("cao_micro_pvm.qs", nthreads = 10)
cao_msa <- qread("cao_micro_pvm_msa.qs", nthreads = 10)
cao_pd <- qread("cao_micro_pvm_pd.qs", nthreads = 10)
cao_dis <- qread("cao_micro_pvm_dis.qs", nthreads = 10)
sample.groups <- cao$sample.groups

Figure 5a

cao$plotEmbedding(groups = anno.micro,
                  size = 0.5, 
                  palette = pal.major, 
                  font.size = 3, 
                  raster = T, 
                  mark.groups = T,
                  plot.na = F,
                  alpha = 0.2) + 
  labs(x="UMAP1", y= "UMAP2") +
  theme(line = element_blank())

Figure 5b

cm.merged <- cao$data.object$getJointCountMatrix()

markers <- c("AIF1","F13A1","MRC1","CD163","CD74")

dotPlot(markers, 
        cm.merged, 
        cao$cell.groups, 
        cols = c("white","firebrick"))

Figure 5c-e

anno.sort <- anno.micro[rownames(cao$data.object$embedding)]
anno.sel <- anno.sort[!anno.sort %in% c("MIC_intermediate1", "PVMs")] %>% factor()
anno.sel2 <- anno.sort[!anno.sort %in% c("MIC_intermediate2", "MIC_activated")] %>% factor()

ldata1 <- getTscanTrajectory(cao$data.object, anno.sel)
ldata2 <- getTscanTrajectory(cao$data.object, anno.sel2)

ldata <- rbind(ldata1, ldata2)

Figure 5c

cao$data.object$embedding %>% 
  as.data.frame() %>% 
  mutate(., unname(anno.micro[rownames(.)])) %>% 
  setNames(c("UMAP1", "UMAP2", "annotation")) %>% 
  ggplot() + 
  geom_point(aes(UMAP1, UMAP2, col = annotation), size = 0.3) +
  geom_line(data = ldata, mapping=aes(UMAP1, UMAP2, group = edge), linewidth = 1) +
  theme_bw() + 
  theme(legend.position = "right",
        line = element_blank()) + 
  labs(col = "", x = "UMAP1", y = "UMAP2") +
  scale_colour_manual(values = pal.major) +
  dotSize(3)

Figures 5d,e & EDF14

Prepare data

emb <- cao$data.object$embedding %>% 
  `colnames<-`(c("UMAP1","UMAP2")) %>% 
  .[rownames(.) %in% names(anno.sel2), ]

sds_obj <- slingshot(emb, 
                     anno.sel2, 
                     start.clus = "MIC_steady-state",
                     stretch = 0
)

sds <- as.SlingshotDataSet(sds_obj)

pseudotime <- sds_obj@assays@data@listData$pseudotime[, 1]

Here, we show the code for running the generalized additive mixed model. It takes a significant amount of time to run, we provide data in pseudotime_micro_l2.qs.

mat <- cao$data.object$getJointCountMatrix() %>%
  .[match(names(sds_obj), rownames(.)), colSums(.) > 2e2] %>% 
  .[, !grepl(pattern = "RPL|RPS|MT-", colnames(.))]

mt <- mat %>%
  as.matrix() %>%
  as.data.frame() %>%
  tibble::rownames_to_column() %>%
  {message("Mutating"); mutate(.,
                               pseudotime = pseudotime[rowname],
                               condition = cao$data.object$getDatasetPerCell()[rowname] %>%
                                 as.character() %>%
                                 strsplit("_") %>%
                                 sget(1),
                               sample = strsplit(rowname, "!!") %>%
                                 sget(1))} %>%
  .[complete.cases(.), ] %>%
  {message("Melting"); reshape2::melt(., id.vars = c("rowname", "pseudotime", "condition", "sample"))} %$%
  {message("Splitting"); split(., variable)}
res <- mt %>% 
  sccore::plapply(\(x) {
    fit_full <- gamm4(data = x, formula = value ~ s(pseudotime, by = factor(condition)), random = ~(1 | sample), REML = F)
    fit_reduced <- gamm4(data = x, formula = value ~ s(pseudotime), random = ~(1 | sample), REML = F)
    fit_none <- gamm4(data = x, formula = value ~ 1, random = ~(1 | sample), REML = F)
    
    ann <- anova(fit_full$mer, fit_reduced$mer, fit_none$mer)
    
    if (any(ann$`Pr(>Chisq)` <= 0.05, na.rm = T)) {
      residuals <- predict(fit_full$gam, se.fit = T)$fit %>% 
        unname()
      
      r.sq <- c(summary(fit_full$gam)$r.sq, summary(fit_reduced$gam)$r.sq) %>% 
        setNames(c("full", "reduced"))
      
      out <- list(anova = ann,
                  residuals = residuals,
                  r.sq = r.sq)
      
      return(out)
    }
  }, n.cores = 40, mc.preschedule = T, mc.cleanup = T, progress = F) %>%
  .[!sapply(., is.null)]

qsave(res, "pseudotime_micro_l2.qs")

Figure 5d

cao$data.object$embedding %>% 
  as.data.frame() %>% 
  mutate(., unname(anno.micro[rownames(.)])) %>% 
  setNames(c("UMAP1", "UMAP2", "annotation")) %>% 
  mutate(., pseudotime = pseudotime[rownames(.)]) %>%
  ggplot() +
  geom_point(aes(UMAP1, UMAP2, col = pseudotime), size = 0.3) +
  geom_line(data = ldata2, aes(UMAP1, UMAP2, group = edge), size = 1) +
  theme_bw() +
  theme(line = element_blank()) +
  scale_color_gradient(low = "navyblue", high = "orange") +
  labs(col = "Pseudotime", x = "UMAP1", y = "UMAP2")
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

Figure 5e

Please note, row order may change per iteration.

res <- qread("pseudotime_micro_l2.qs")

rsq.pseudo <- res %>% 
  sapply(\(gene) gene$r.sq[1]) %>% 
  sort(decreasing = T)

res.filter <- res[names(rsq.pseudo)[seq(50)] %>% strsplit(".", fixed = T) %>% sget(1)]

cm.merged <- cao$data.object$getJointCountMatrix() %>% 
  .[match(names(pseudotime), rownames(.)), colnames(.) %in% names(res.filter)] %>% 
  .[rowSums(.) > 0, ]

## Predict smoothend expression 
pseudotime.both <- pseudotime[rownames(cm.merged)]
weights.both <- sds_obj@assays@data@listData$weights %>% .[match(names(pseudotime.both), rownames(.)), 1] + 1E-7

scFit <- cm.merged %>% 
  Matrix::t() %>% 
  tradeSeq::fitGAM(pseudotime = pseudotime.both, cellWeights = weights.both, verbose = T)

Smooth <- tradeSeq::predictSmooth(scFit, gene = colnames(cm.merged), tidy = F, n=100)

# Average across replicates and scale
Smooth <- t(scale(t(Smooth)))

# Seriate the results
Smooth <- Smooth[ seriation::get_order(seriation::seriate(Smooth, method="PCA_angle")), ]

# Create heatmap
col_fun = circlize::colorRamp2(c(-4, 0, 4), c("navy", "white", "firebrick"))

Heatmap(Smooth, 
        col = col_fun,
        cluster_columns=F, 
        cluster_rows=F, 
        show_column_names = F, 
        row_names_gp = grid::gpar(fontsize = 5))

##              used  (Mb) gc trigger    (Mb)   max used    (Mb)
## Ncells   13219434   706   30800521  1645.0   30800521  1645.0
## Vcells 1571546063 11990 5440650121 41508.9 8457159004 64523.1

Extended Data Figure 14

We provide this figure here since all data are already loaded.

cpc <- pseudotime %>% 
  names() %>% 
  strsplit("_") %>% 
  sget(1) %>% 
  factor()

res[rownames(Smooth)] %>% 
  lget("residuals") %>% 
  lapply(as.data.frame) %>% 
  lapply(setNames, "residuals") %>% 
  lapply(mutate, group = cpc, pseudotime = pseudotime) %>% 
  data.table::rbindlist(idcol = "gene") %>% 
  ggplot(aes(pseudotime, residuals, col = group)) +
  geom_smooth() +
  theme_bw() +
  facet_wrap(~gene, ncol = 5, scales = "free")
## `geom_smooth()` using method = 'gam' and formula = 'y ~ s(x, bs = "cs")'

Figure 5f

cao_msa$plotCellLoadings(show.pvals = F, 
                         alpha = 0)$data %>% 
  ggplot(aes(ind, values, fill = ind)) +
  geom_hline(yintercept = 0, col = "black") +
  geom_violin() + 
  coord_flip() + 
  theme_bw() + 
  scale_fill_manual(values = pal.major) + 
  theme(legend.position = "none") +
  scale_y_continuous(breaks = c(-1,0,1), 
                     limits = c(-1,1),
                     labels = c("-1\nCTRL",0,"1\nMSA")) +
  geom_vline(xintercept = 4.5, col = "red") +
  labs(x = "", y = "") +
  theme(line = element_blank())

Figure 5g

cao_pd$plotCellLoadings(show.pvals = F, 
                        alpha = 0)$data %>% 
  ggplot(aes(ind, values, fill = ind)) + 
  geom_hline(yintercept = 0, col = "black") +
  geom_violin() + 
  coord_flip() + 
  theme_bw() + 
  scale_fill_manual(values = pal.major) + 
  theme(legend.position = "none") +
  scale_y_continuous(breaks = c(-1,0,1), 
                     limits = c(-1,1),
                     labels = c("-1\nCTRL",0,"1\nPD")) +
  geom_vline(xintercept = 4.5, col = "red") +
  labs(x = "", y = "") +
  theme(line = element_blank())
## Warning: Removed 273 rows containing non-finite outside the scale range
## (`stat_ydensity()`).

Figure 5h

cao_dis$plotCellLoadings(show.pvals = F, 
                         alpha = 0)$data %>% 
  ggplot(aes(ind, values, fill = ind)) + 
  geom_hline(yintercept = 0, col = "black") +
  geom_violin() + 
  coord_flip() + 
  theme_bw() + 
  scale_fill_manual(values = pal.major) + 
  theme(legend.position = "none") +
  scale_y_continuous(breaks = c(-1,0,1), 
                     limits = c(-1,1),
                     labels = c("-1\nPD",0,"1\nMSA")) +
  geom_vline(xintercept = 3.5, col = "red") +
  labs(x = "", y = "") +
  theme(line = element_blank())
## Warning: Removed 311 rows containing non-finite outside the scale range
## (`stat_ydensity()`).

##              used    (Mb) gc trigger    (Mb)   max used    (Mb)
## Ncells   12785619   682.9   30800521  1645.0   30800521  1645.0
## Vcells 1426359447 10882.3 4352520097 33207.1 8457159004 64523.1

Figure 5j

We provide combined data from the RNAscope experiments as a single file res.qs.

res <- qread("RNAscope.qs") %>% 
  filter(target == "AIF1", Ch1NumSpots > 0) %>% 
  arrange(file)

area <- read.table("RNAscope_areas.tsv", sep = "\t", header = T) %>% # Million pixels
  mutate(file = paste(File.name, Tissue, sep = "")) %>% 
  filter(file %in% res$file) %>% 
  arrange(file) %>% 
  mutate(rel_px = px.2 / 1E6)
p1 <- res %>% 
  group_by(group, target, file) %>% 
  summarize(spots = mean(Ch1NumSpots)) %>% 
  ggplot(aes(group, spots, fill = group)) + 
  geom_boxplot() +
  geom_jitter(width = 0.2) +
  theme_bw() +
  labs(y = "Mean no. spots per double-positive cell", x = "") +
  stat_compare_means(method = "kruskal.test", label.y = 115) +
  stat_compare_means(comparisons = comp, method = "wilcox.test") +
  theme(line = element_blank()) +
  guides(fill = "none") +
  scale_fill_manual(values = pal.major)
## `summarise()` has grouped output by 'group', 'target'. You can override using
## the `.groups` argument.
p2 <- res %>% 
  group_by(group, target, file) %>% 
  summarize(no_cells = n()) %>% 
  as.data.frame() %>% 
  arrange(file) %>% 
  mutate(rel_cells = no_cells / area$rel_px) %>% 
  ggplot(aes(group, rel_cells, fill = group)) + 
  geom_boxplot(outliers = F) +
  geom_jitter(width = 0.2) +
  theme_bw() +
  stat_compare_means(method = "kruskal.test", label.y = 17) +
  stat_compare_means(comparisons = comp, method = "wilcox.test") +
  labs(y = "No. double-positive cells\nNormalized to area", x = "") +
  theme(line = element_blank()) +
  guides(fill = "none") +
  scale_fill_manual(values = pal.major)
## `summarise()` has grouped output by 'group', 'target'. You can override using
## the `.groups` argument.
plot_grid(plotlist = list(p1, p2), ncol = 1)

Figure 5k

fams <- cao_msa$test.results[["GSEA"]]$families

df.all <- list(cao_msa$.__enclos_env__$private$getOntologyPvalueResults(name = "GSEA", genes = "up", p.adj = 1, q.value = 1),
               cao_msa$.__enclos_env__$private$getOntologyPvalueResults(name = "GSEA", genes = "down", p.adj = 1, q.value = 1)) %>% 
  bind_rows() %>% 
  mutate(logp = -log10(p.adjust))

# Relevant pathways
p.ids <- c("GO:0060760", "GO:0002230", "GO:0019915", "GO:1904646", "GO:0042775", "GO:0019646", "GO:0033108", "GO:0035455", "GO:0061077", "GO:0006914", "GO:0042026", "GO:0034340", "GO:0051607", "GO:0060337", "GO:0035455", "GO:0002684", "GO:0036037", "GO:0048514")

df <- cacoa:::getOntologyFamilyChildren(df.all, fams=fams, type = "GSEA", subtype = "BP") %>% 
  mutate(Group = paste0("MIC_", tolower(Group)) %>% gsub("MIC_pvms", "PVMs", .)) %>% 
  filter(ID %in% p.ids)

df.all %<>% mutate(Group = paste0("MIC_", tolower(Group)) %>% gsub("MIC_pvms", "PVMs", .))

p1 <- ggplot(df.all, aes(NES, logp, col = Group, shape = Group)) +
  geom_point(data = ~filter(., p.adjust > 0.05), size = 0.1, alpha = 0.5, col = "black") +
  geom_point(data = ~filter(., p.adjust <= 0.05), size = 2, alpha = 0.5) +
  theme_bw() +
  geom_hline(yintercept = -log10(0.05), linetype = "dashed") +
  scale_color_manual(values = pal.major) +
  theme(line = element_blank()) +
  labs(y = "-log10(adj. p)") +
  dotSize(3)

p2 <- df %>% 
  filter(p.adjust <= 0.05, NES > 0) %>% 
  mutate(Group = factor(Group, levels = sort(unique(Group)))) %>% 
  select(Description, NES, Group) %$% 
  split(., Group) %>% 
  lapply(dplyr::slice, seq(5)) %>% 
  bind_rows() %>% 
  dplyr::slice(seq(25)) %>% 
  mutate(., x = 0, y = rev(seq(nrow(.)))) %>% 
  ggplot(aes(x, y, label = Description, col = Group)) + 
  geom_text(hjust = 0) +
  theme_void() +
  xlim(0, 1) +
  labs(title = "Selected terms for up-regulated genes") +
  scale_color_manual(values = pal.major) +
  guides(col = "none")

p3 <- df %>% 
  filter(p.adjust <= 0.05, NES < 0) %>% 
  select(Description, NES, Group) %>%
  dplyr::slice(seq(20)) %>% 
  mutate(., x = 0, y = rev(seq(nrow(.)))) %>% 
  ggplot(aes(x, y, label = Description, col = Group)) + 
  geom_text(hjust = 0) +
  theme_void() +
  xlim(0, 1) +
  labs(title = "Selected terms for down-regulated genes") +
  scale_color_manual(values = pal.major) +
  guides(col = "none")

plot_grid(plotlist = list(p3, p1, p2), ncol = 3, rel_widths = c(0.9, 1, 0.9))

##              used   (Mb) gc trigger    (Mb)   max used    (Mb)
## Ncells   12258503  654.7   30800521  1645.0   30800521  1645.0
## Vcells 1282522615 9784.9 3482016078 26565.7 8457159004 64523.1

Figure 5l

fams <- cao_pd$test.results[["GSEA"]]$families

df.all <- list(cao_pd$.__enclos_env__$private$getOntologyPvalueResults(name = "GSEA", genes = "up", p.adj = 1, q.value = 1),
               cao_pd$.__enclos_env__$private$getOntologyPvalueResults(name = "GSEA", genes = "down", p.adj = 1, q.value = 1)) %>% 
  bind_rows() %>% 
  mutate(logp = -log10(p.adjust))

# Select relevant pathways
p.ids <- c("GO:0042776", "GO:0034341", "GO:0001916", "GO:0002503", "GO:0019885", "GO:0016042", "GO:0001909", "GO:0051085", "GO:0045824", "GO:0044000", "GO:0019885", "GO:0044242", "GO:0001944", "GO:0034976", "GO:0019646", "GO:0009205", "GO:0019883", "GO:0070972", "GO:0002474")

df <- cacoa:::getOntologyFamilyChildren(df.all, fams=fams, type = "GSEA", subtype = "BP") %>% 
  mutate(Group = paste0("MIC_", tolower(Group)) %>% gsub("MIC_pvms", "PVMs", .)) %>% 
  filter(ID %in% p.ids)

df.all %<>% mutate(Group = paste0("MIC_", tolower(Group)) %>% gsub("MIC_pvms", "PVMs", .))

p1 <- ggplot(df.all, aes(NES, logp, col = Group, shape = Group)) +
  geom_point(data = ~filter(., p.adjust > 0.05), size = 0.1, alpha = 0.5, col = "black") +
  geom_point(data = ~filter(., p.adjust <= 0.05), size = 2, alpha = 0.5) +
  theme_bw() +
  geom_hline(yintercept = -log10(0.05), linetype = "dashed") +
  scale_color_manual(values = pal.major) +
  theme(line = element_blank()) +
  labs(y = "-log10(adj. p)") +
  dotSize(3)

p2 <- df %>% 
  filter(p.adjust <= 0.05, NES > 0) %>% 
  select(Description, NES, Group) %$% 
  split(., Group) %>% 
  lapply(dplyr::slice, seq(5)) %>% 
  bind_rows() %>% 
  dplyr::slice(seq(25)) %>% 
  mutate(., x = 0, y = rev(seq(nrow(.)))) %>% 
  ggplot(aes(x, y, label = Description, col = Group)) + 
  geom_text(hjust = 0) +
  theme_void() +
  xlim(0, 1) +
  labs(title = "Selected terms for up-regulated genes") +
  scale_color_manual(values = pal.major) +
  guides(col = "none")

p3 <- df %>% 
  filter(p.adjust <= 0.05, NES < 0) %>% 
  select(Description, NES, Group) %$% 
  split(., Group) %>% 
  lapply(dplyr::slice, seq(5)) %>% 
  bind_rows() %>% 
  dplyr::slice(seq(25)) %>% 
  mutate(., x = 0, y = rev(seq(nrow(.)))) %>% 
  ggplot(aes(x, y, label = Description, col = Group)) + 
  geom_text(hjust = 0) +
  theme_void() +
  xlim(0, 1) +
  labs(title = "Selected terms for down-regulated genes") +
  scale_color_manual(values = pal.major) +
  guides(col = "none")

plot_grid(plotlist = list(p3, p1, p2), ncol = 3, rel_widths = c(0.9, 1, 0.9))

##              used   (Mb) gc trigger    (Mb)   max used    (Mb)
## Ncells   11637391  621.6   30800521  1645.0   30800521  1645.0
## Vcells 1080376611 8242.7 3482016078 26565.7 8457159004 64523.1

Figure 6

Figures 6c,e,f where made with GraphPad Prism. Data are available in Phagocytosis_assay_triplicates_raw_data_HH.tsv (Copenhagen experiment) or HOUSTON.tsv (Houston experiment).

Figure 6b

dat <- read.delim("Phagocytosis_assay_triplicates_raw_data_HH.tsv", 
                  header = T, dec = ",") %>% 
  tidyr::pivot_longer(names_to = "group", cols = -1, values_to = "value") %>% 
  mutate(group = gsub(".1|.2", "", group) %>% factor(labels = c("PBS","LPS","MSA CSF","CTRL CSF","PD CSF"))) %>% 
  mutate(group = factor(group, levels = c("PBS","LPS","CTRL CSF","MSA CSF","PD CSF"))) %>% 
  group_by(Time, group) %>% 
  summarize(mean = mean(value, na.rm = T), 
            sd = sd(value, na.rm = T))
## `summarise()` has grouped output by 'Time'. You can override using the
## `.groups` argument.
dat %>% 
  ggplot(aes(Time, mean, group=group, color=group)) + 
  geom_errorbar(aes(ymin=mean-sd, ymax=mean+sd), width=.1) +
  geom_point(size = 0.5) +
  theme_bw() +
  labs(x = "Time (hours)", y = "pHrodo-labelled E. coli uptake - intensity ratio [AU]", col = "Stimulation") +
  scale_color_manual(values = pal.major) +
  theme(line = element_blank())

Figure 6d

dat.raw <- read.table("Cytokine_summary.tsv", header = TRUE, sep = "\t", dec = ",")

dat.tmp <- dat.raw %>%
  melt(id.vars = c("Sample","Group","Condition")) %>% 
  mutate(variable = variable %>% 
           as.character() %>% 
           gsub(".", "-", ., fixed = T) %>% 
           as.factor()) %>%
  filter(Condition == "Media", !Group %in% c("PBS","LPS")) %>%
  filter(Condition == "Media") %>%
  mutate(Group = Group %>% factor(levels = c("CTRL","MSA","PD")),
         variable = variable %>% factor())

dat.stat <- dat.raw %>% 
  select(!IL.8) %>% 
  mutate(Group = factor(Group, levels = c("PBS","LPS","CTRL","MSA","PD"))) %>% 
  filter(!Group %in% c("PBS","LPS"))

res.il10 <- FSA::dunnTest(IL10 ~ Group, data = dat.stat %>% {`colnames<-`(., gsub(".", "", colnames(.), fixed = T))}, method = "bh")
## Registered S3 methods overwritten by 'FSA':
##   method       from
##   confint.boot car 
##   hist.boot    car
stat.test <- dat.tmp %>% 
  filter(variable == "IL-10") %>% 
  select(Group, value) %>% 
  as.data.frame() %>%
  rstatix::wilcox_test(value ~ Group) %>% 
  mutate(p = res.il10$res$P.unadj,
         p.adj = res.il10$res$P.adj %>% formatC(digits = 2),
         p.adj.signif = c("*","ns","ns")) %>% 
  rstatix::add_xy_position(x = "Group")

dat.tmp %>% 
  filter(variable == "IL-10") %>%
  ggplot(aes(Group, value)) + 
  geom_point(aes(col = Group), size = 3, position = position_jitter(width = 0.3)) + 
  theme_bw() +
  theme(line = element_blank()) +
  stat_pvalue_manual(stat.test, label = "p.adj", tip.length = 0.01, backet.nudge.y = 3, hide.ns = F) +
  scale_color_manual(values = pal.major) +
  labs(x = "", y = "IL-10 pg/mL") +
  guides(col = "none")

Figures 6g-i

dat.melt <- read.table("Microglia+CSF_samples_aSyn_sCD163.tsv", header = T, sep = "\t", dec = ",") %>%  
  melt(id.vars = c("diagnosis", "sex")) %>% 
  mutate(diagnosis = factor(diagnosis, levels = c("CTRL","MSA","IPD"), labels = c("CTRL","MSA","PD")))

Figure 6g

dat.melt %>% 
  filter(variable == "sCD163_ng.mL_CSF") %>% 
  ggplot(aes(diagnosis, value)) +
  geom_boxplot(aes(fill = diagnosis)) +
  theme_bw() + 
  labs(x = "", y = "sCD163 ng/ml", fill = "Diagnosis") +
  scale_fill_manual(values = pal.major) +
  stat_compare_means(method = "t.test", comparisons = list(c("CTRL","MSA"), c("MSA","PD"), c("CTRL","PD"))) +
  guides(fill = "none") +
  theme(line = element_blank())

Figure 6h

dat.melt %>% 
  filter(variable == "aSyn_pg.mL") %>% 
  ggplot(aes(diagnosis, value)) +
  geom_boxplot(aes(fill = diagnosis)) +
  theme_bw() + 
  labs(x = "", y = "aSyn pg/ml", fill = "Diagnosis") +
  scale_fill_manual(values = pal.major) +
  stat_compare_means(method = "t.test", comparisons = list(c("CTRL","MSA"), c("MSA","PD"), c("CTRL","PD"))) +
  guides(fill = "none") +
  theme(line = element_blank())
## Warning: Removed 36 rows containing non-finite outside the scale range
## (`stat_boxplot()`).
## Warning: Removed 36 rows containing non-finite outside the scale range
## (`stat_signif()`).

Figure 6i

dat.melt %>% 
  filter(variable %in% c("aSyn_pg.mL","sCD163_ng.mL_CSF")) %>%
  select(-sex) %>% 
  mutate(id = rep(seq(63), 2)) %>% 
  tidyr::pivot_wider(names_from = variable, values_from = value) %>% 
  ggplot(aes(aSyn_pg.mL, sCD163_ng.mL_CSF)) +
  geom_point(aes(col = diagnosis)) +
  theme_bw() +
  labs(x = "aSyn pg/ml", y = "sCD163, ng/ml", col = "") +
  geom_smooth(method = MASS::rlm, se = FALSE, color = "black") +
  stat_poly_eq(aes(label = paste(after_stat(rr.label), after_stat(p.value.label), sep = "*\", \"*")), color = "black") +
  scale_color_manual(values = pal.major) +
  theme(line = element_blank())
## `geom_smooth()` using formula = 'y ~ x'
## Warning: Removed 36 rows containing non-finite outside the scale range
## (`stat_smooth()`).
## Warning: Removed 36 rows containing non-finite outside the scale range
## (`stat_poly_eq()`).
## Warning: Removed 36 rows containing missing values or values outside the scale range
## (`geom_point()`).

Figure 6j

Here, we use data from Rydbirk et al. on differentially expressed proteins in the soluble fraction of CSF pools.

dat <- read.delim("Soluble_fraction.txt")

msa.dep <- dat %>% 
  filter(Disease.group == "MSA") %>% 
  pull(Gene.name)

pd.dep <- dat %>% 
  filter(Disease.group == "PD") %>% 
  pull(Gene.name)

overlap <- intersect(msa.dep, pd.dep)

highlight_proteins <- c("CTSS", "FCGR2A", "LAMP1", "CTSD", "CLTC")

msa.dep %<>% .[!. %in% overlap]
pd.dep %<>% .[!. %in% overlap]
overlap %<>% .[!. %in% highlight_proteins]

string_db <- STRINGdb$new(
  version = "12.0",       # or latest available
  species = 9606,         # 9606 = Homo sapiens
  score_threshold = 400,  # confidence score cutoff
  input_directory = ""
)

genes <- c(msa.dep, pd.dep, overlap)

mapped_genes <- string_db$map(
  data.frame(gene = genes),
  "gene",
  removeUnmappedRows = TRUE
)
## Warning:  we couldn't map to STRING 5% of your identifiers
# Get interactions
interactions <- string_db$get_interactions(mapped_genes$STRING_id)

# Convert to igraph object
ppi_graph <- graph_from_data_frame(interactions, directed = FALSE)

# Create label mapping: STRING ID to gene name
string_to_gene <- mapped_genes %>%
  select(STRING_id, gene) %>%
  distinct()

# Add labels to graph nodes
V(ppi_graph)$label <- string_to_gene$gene[match(V(ppi_graph)$name, string_to_gene$STRING_id)]

custom_colors <- c(
  "CTSS" = "firebrick",
  "FCGR2A" = "purple2",
  "LAMP1" = "purple2",
  "CTSD" = "navyblue",
  "CLTC" = "navyblue",
  setNames(rep("tomato", length(msa.dep)), msa.dep),
  setNames(rep("steelblue", length(pd.dep)), pd.dep),
  setNames(rep("orchid", length(overlap)), overlap))

# Apply color mapping to node attributes
V(ppi_graph)$node_color <- custom_colors[V(ppi_graph)$label]

# Get node labels for each edge endpoint
edge_ends <- ends(ppi_graph, es = E(ppi_graph), names = FALSE)
source_labels <- V(ppi_graph)$label[edge_ends[, 1]]
target_labels <- V(ppi_graph)$label[edge_ends[, 2]]

# Assign edge width
E(ppi_graph)$edge_width <- ifelse(
  source_labels %in% highlight_proteins & target_labels %in% highlight_proteins,
  0.5, 0.1
)

# Compute node sizes based on label
V(ppi_graph)$node_size <- sapply(V(ppi_graph)$label, \(x) if (x %in% overlap) 3 else if (x %in% highlight_proteins) 5 else 2)

V(ppi_graph)$label_type <- ifelse(
  V(ppi_graph)$label %in% highlight_proteins,
  "highlight", "other"
)

# Plot the network with size and color
ggraph(ppi_graph, layout = "fr") +
  geom_edge_link(aes(width = I(edge_width)), alpha = 0.4) +
  geom_node_point(aes(color = I(node_color), size = I(node_size)), show.legend = FALSE) +
  geom_node_label(data = function(x) { x[x$label_type == "highlight", ] },
                  aes(label = label), size = 3, label.padding = unit(0.15, "lines"), repel = TRUE, show.legend = F) +
  geom_node_text(data = function(x) { x[x$label_type == "other", ] },
                 aes(label = label), size = 2, repel = TRUE, show.legend = F) +
  theme_void()

Extended Data Figure 1

Load data

samples <- anno.major %>% 
  names() %>% 
  strsplit("!!") %>% 
  sget(1) %>% 
  unique()

crm <- qread("crm.qs",
             nthreads = 10)

1a-f

mtp <- crm$selectMetrics(ids = c(1:4,6,19))

mtp %>% 
  lapply(\(met) crm$plotSummaryMetrics(metrics = met, comp.group = "sample", second.comp.group = "group") + 
           scale_fill_manual(values = pal.major) +
           labs(x = "") + 
           theme(axis.text.x = element_blank(),
                 axis.ticks.x = element_blank())) %>% 
  plot_grid(plotlist = ., labels = letters[1:6], ncol = 2)
## Scale for fill is already present.
## Adding another scale for fill, which will replace the existing scale.
## Scale for fill is already present.
## Adding another scale for fill, which will replace the existing scale.
## Scale for fill is already present.
## Adding another scale for fill, which will replace the existing scale.
## Scale for fill is already present.
## Adding another scale for fill, which will replace the existing scale.
## Scale for fill is already present.
## Adding another scale for fill, which will replace the existing scale.
## Scale for fill is already present.
## Adding another scale for fill, which will replace the existing scale.

1g

crm$plotSummaryMetrics(metrics = crm$selectMetrics(ids = 20), comp.group = "sample", second.comp.group = "group") + 
  scale_fill_manual(values = pal.major) +
  labs(x = "") + 
  theme(axis.text.x = element_blank(),
        axis.ticks.x = element_blank(),
        legend.position = "right")
## Scale for fill is already present.
## Adding another scale for fill, which will replace the existing scale.

1h

crm$plotFilteredCells(doublet.method = "doubletdetection", depth.cutoff = 5e2, size = 0.2, alpha = 0.1) +
  dotSize(3)
## Scale for colour is already present.
## Adding another scale for colour, which will replace the existing scale.

1i

crm$plotFilteredCells(type = "bar", doublet.method = "doubletdetection", depth.cutoff = 5e2)$data %>%  
  filter(sample != "MSA_1406") %>%
  mutate(sample = strsplit(sample, "_") %>% 
           sget(1)) %$% 
  split(., sample) %>% 
  lapply(\(x) split(x, x$filter)) %>% 
  lapply(lapply, \(df) mutate(df, sample = paste0(sample, seq(nrow(df))))) %>% 
  lapply(bind_rows) %>% 
  bind_rows() %>%
  mutate(sample = factor(sample, levels = c(paste0("CTRL", seq(10)), paste0("MSA", seq(7)), paste0("PD", seq(12))))) %>% 
  ggplot(aes(sample, pct, fill = filter)) + 
  geom_bar(stat = "identity") + 
  geom_text_repel(aes(label = sprintf("%0.2f", round(pct, digits = 2))), 
                  position = position_stack(vjust = 0.5), 
                  direction = "y", 
                  size = 2.5) + 
  crm$theme + 
  theme(axis.text.x = element_text(angle = 45, hjust = 1)) + 
  labs(x = "", y = "Percentage cells filtered") +
  theme(legend.position = "bottom")

Extended Data Figure 2

These figures cannot be reproduced here due to GDPR. Leaving the code for visibility.

Load data

crm <- qread("crm.qs", nthreads = 10)

2a

crm$plotCbCells(samples = crm$metadata$sample %>% as.character() %>% .[. != "MSA_1406"]) +
  scale_x_discrete(labels = c(paste("CTRL", seq(10)), paste("MSA", seq(7)), paste("PD", seq(12)))) +
  theme(axis.text.x = element_text(angle = 45, hjust = 1, vjust = 1))

2b

crm$plotCbAmbGenes() +
  theme(line = element_blank(),
        axis.text.x = element_text(angle = 90, hjust = 1, vjust = 0.5),
        axis.ticks.x = element_line())
##              used   (Mb) gc trigger    (Mb)   max used    (Mb)
## Ncells   18083933  965.8   50794518  2712.8   50794518  2712.8
## Vcells 1042913844 7956.9 2785612863 21252.6 8457159004 64523.1

2c

c("RBFOX3","MOG","VCAN","AQP4","CSF1R","PDGFRB","PTPRC","MRC1","GAD1","SLC17A7","PPP1R1B") %>%
  sort() %>% 
  lapply(function(g) con$plotGraph(embedding = "UMAP", gene=g, title=g, size=0.1, plot.na = F, palette = colorRampPalette(c("grey90","firebrick"))) + theme(line = element_blank())) %>%
  cowplot::plot_grid(plotlist=., ncol=2)

Extended Data Figure 3

con$embedding <- con$embeddings$UMAP

sample.groups <- con$samples %>% 
  names() %>% 
  setNames(grepl.replace(., c("CTRL","MSA","PD")), .)

cao <- Cacoa$new(data.object = con, 
                 sample.groups = ifelse(grepl("CTRL", sample.groups), "CTRL", "DISEASE") %>% 
                   setNames(names(sample.groups)), 
                 cell.groups = anno.major, 
                 ref.level = "CTRL", 
                 target.level = "DISEASE", 
                 n.cores = 32)

meta <- read.delim("metadata.tsv", sep = "\t") %>%
  filter(sample %in% names(con$samples)) %>%
  tibble::column_to_rownames("sample") %>% 
  dplyr::select(-condition)

meta %<>%
  mutate(sample = rownames(.))

spc <- con$getDatasetPerCell()

mpc <- spc %>% 
  data.frame(sample = ., cid = names(.))

for (cc in colnames(meta)) {
  mpc[[cc]] <- meta[[cc]][match(mpc$sample, meta$sample)]
}

mpc$subtype[mpc$subtype == ""] <- NA
p <- con$plotGraph(colors = setNames(mpc$age, mpc$cid), plot.na = F, embedding = "UMAP_1_0.001_5", mark.groups = F, show.legend = T, title = "Age", size = 0.3, alpha = 0.3) +
        scale_color_gradient(low = "grey", high = "firebrick") +
        theme(line = element_blank()) +
  labs(col = "Years")
## Scale for colour is already present.
## Adding another scale for colour, which will replace the existing scale.
p

p <- con$plotGraph(colors = setNames(mpc$pmi, mpc$cid), plot.na = F, embedding = "UMAP_1_0.001_5", mark.groups = F, show.legend = T, title = "PMI", size = 0.3, alpha = 0.3) +
        scale_color_gradient(low = "grey", high = "firebrick") +
        theme(line = element_blank()) +
  labs(col = "Hours")
## Scale for colour is already present.
## Adding another scale for colour, which will replace the existing scale.
p

p <- con$plotGraph(colors = setNames(mpc$disease_duration, mpc$cid), plot.na = F, embedding = "UMAP_1_0.001_5", mark.groups = F, show.legend = T, title = "Disease duration", size = 0.3, alpha = 0.3) +
        scale_color_gradient(low = "grey", high = "firebrick") +
        theme(line = element_blank()) +
  labs(col = "Years")
## Scale for colour is already present.
## Adding another scale for colour, which will replace the existing scale.
p

p <- con$plotGraph(groups = setNames(mpc$sample, mpc$cid), plot.na = F, embedding = "UMAP_1_0.001_5", mark.groups = F, show.legend = T, shuffle.colors = T, title = "Sample", size = 0.3, alpha = 0.3) +
        theme(line = element_blank()) + dotSize(3)

p

p <- con$plotGraph(groups = setNames(mpc$brain_bank, mpc$cid), plot.na = F, embedding = "UMAP_1_0.001_5", mark.groups = F, show.legend = T, shuffle.colors = T, title = "Brain bank", size = 0.3, alpha = 0.3) +
        theme(line = element_blank()) + dotSize(3)

p

p <- con$plotGraph(groups = setNames(mpc$sex, mpc$cid), plot.na = F, embedding = "UMAP_1_0.001_5", mark.groups = F, show.legend = T, shuffle.colors = T, title = "Sex", size = 0.3, alpha = 0.3) +
        theme(line = element_blank()) + dotSize(3)

p

p <- con$plotGraph(groups = setNames(mpc$subtype, mpc$cid), plot.na = F, embedding = "UMAP_1_0.001_5", mark.groups = F, show.legend = T, shuffle.colors = T, title = "Subtype", size = 0.3, alpha = 0.3) +
        theme(line = element_blank()) + dotSize(3)

p

p <- con$plotGraph(groups = setNames(mpc$extract, mpc$cid), plot.na = F, embedding = "UMAP_1_0.001_5", mark.groups = F, show.legend = T, shuffle.colors = T, title = "Extract", size = 0.3, alpha = 0.3) +
        theme(line = element_blank()) + dotSize(3)

p

p <- con$plotGraph(groups = setNames(mpc$flowcell, mpc$cid), plot.na = F, embedding = "UMAP_1_0.001_5", mark.groups = F, show.legend = T, shuffle.colors = T, title = "Flowcell", size = 0.3, alpha = 0.3) +
        theme(line = element_blank()) + dotSize(3)

p

Extended Data Figure 4 & EDT3

Load metadata

meta <- read.delim("metadata.tsv", sep = "\t") %>%
  filter(sample %in% names(con$samples)) %>%
  tibble::column_to_rownames("sample") %>% 
  mutate(sample = rownames(.))
  # dplyr::select(-condition)

4a

con_pd <- con$clone()
con_pd$embedding <- con$embeddings$UMAP

con_pd$samples <- con$samples %>% .[!grepl("MSA", names(.))]

sample.groups <- con_pd$samples %>% 
  names() %>% 
  setNames(grepl.replace(., c("CTRL","PD")), .)

cao_pd <- Cacoa$new(data.object = con_pd, 
                 sample.groups = ifelse(grepl("CTRL", sample.groups), "CTRL", "PD") %>% 
                   setNames(names(sample.groups)), 
                 cell.groups = anno.major, 
                 ref.level = "CTRL", 
                 target.level = "PD", 
                 n.cores = 32)

cao_pd$estimateExpressionShiftMagnitudes()
## Filtering data...
## Excluding cell types Exc. neurons that don't have enough samples
## done!
## Calculating pairwise distances using dist='cor'...
## Done!
cao_pd$estimateMetadataSeparation(sample.meta = meta %>% filter(!grepl("MSA", sample)) %>% dplyr::select(-disease_duration, -subtype, -sample), 
                               space = "expression.shifts")
## Warning in cao_pd$estimateMetadataSeparation(sample.meta = meta %>%
## filter(!grepl("MSA", : Significant separation by: condition
cowplot::plot_grid(plotlist = list(
  cao_pd$plotSampleDistances(space = "expression.shifts", show.sample.size = T, method = "UMAP", title = "Condition"),
  cao_pd$plotSampleDistances(space = "expression.shifts", method = "UMAP", show.sample.size = T, sample.colors = meta$age %>% setNames(rownames(meta)), title = "Age"),
  cao_pd$plotSampleDistances(space = "expression.shifts", method = "UMAP", sample.colors = meta$brain_bank %>% setNames(rownames(meta)), show.sample.size = T, title = "Brain bank"),
  cao_pd$plotSampleDistances(space = "expression.shifts", method = "UMAP", sample.colors = meta$extract %>% setNames(rownames(meta)), show.sample.size = T, title = "Extract batch"),
  cao_pd$plotSampleDistances(space = "expression.shifts", method = "UMAP", sample.colors = meta$flowcell %>% setNames(rownames(meta)), show.sample.size = T, title = "Flowcell"),
  cao_pd$plotSampleDistances(space = "expression.shifts", method = "UMAP", show.sample.size = T, sample.colors = meta$pmi %>% setNames(rownames(meta)), title = "PMI"),
  cao_pd$plotSampleDistances(space = "expression.shifts", method = "UMAP", sample.colors = meta$sex %>% setNames(rownames(meta)), show.sample.size = T, title = "Sex"),
  cao_pd$test.results$metadata.separation$padjust %>%
    data.frame(p = .) %>%
    mutate(pp = -log10(p)) %>%
    tibble::rownames_to_column("covariate") %>%
    ggplot(aes(covariate, pp)) +
    geom_bar(stat = "identity", fill = "lightblue4") +
    ylim(0, pmax(2, max(-log10(cao_pd$test.results$metadata.separation$padjust)))) +
    theme_minimal() +
    theme(axis.text.x = element_text(angle = 45, hjust = 1, vjust = 1)) +
    labs(x = "", y = "-log10(adj. p)", title = "Association significance") +
    geom_hline(yintercept = -log10(0.05), colour = "red3")
), ncol = 4) -> p
## Warning in uwot(X = X, n_neighbors = n_neighbors, n_components = n_components,
## : n_components > number of columns in input data: 2 > 1, this may give poor or
## unexpected results
## Warning in uwot(X = X, n_neighbors = n_neighbors, n_components = n_components,
## : n_components > number of columns in input data: 2 > 1, this may give poor or
## unexpected results
## Warning in uwot(X = X, n_neighbors = n_neighbors, n_components = n_components,
## : n_components > number of columns in input data: 2 > 1, this may give poor or
## unexpected results
## Warning in uwot(X = X, n_neighbors = n_neighbors, n_components = n_components,
## : n_components > number of columns in input data: 2 > 1, this may give poor or
## unexpected results
## Warning in uwot(X = X, n_neighbors = n_neighbors, n_components = n_components,
## : n_components > number of columns in input data: 2 > 1, this may give poor or
## unexpected results
## Warning in uwot(X = X, n_neighbors = n_neighbors, n_components = n_components,
## : n_components > number of columns in input data: 2 > 1, this may give poor or
## unexpected results
## Warning in uwot(X = X, n_neighbors = n_neighbors, n_components = n_components,
## : n_components > number of columns in input data: 2 > 1, this may give poor or
## unexpected results
p

4b

con_msa <- con$clone()
con_msa$embedding <- con$embeddings$UMAP

con_msa$samples <- con$samples %>% .[!grepl("PD", names(.))]

sample.groups <- con_msa$samples %>% 
  names() %>% 
  setNames(grepl.replace(., c("CTRL","MSA")), .)

cao_msa <- Cacoa$new(data.object = con_msa, 
                 sample.groups = ifelse(grepl("CTRL", sample.groups), "CTRL", "MSA") %>% 
                   setNames(names(sample.groups)), 
                 cell.groups = anno.major, 
                 ref.level = "CTRL", 
                 target.level = "MSA", 
                 n.cores = 32)

cao_msa$estimateExpressionShiftMagnitudes()
## Filtering data...
## Excluding cell types Exc. neurons that don't have enough samples
## done!
## Calculating pairwise distances using dist='cor'...
## Done!
cao_msa$estimateMetadataSeparation(sample.meta = meta %>% filter(!grepl("PD", sample)) %>% dplyr::select(-brain_bank, -sample), 
                               space = "expression.shifts")
cowplot::plot_grid(plotlist = list(
  cao_msa$plotSampleDistances(space = "expression.shifts", show.sample.size = T, method = "UMAP", title = "Condition"),
  cao_msa$plotSampleDistances(space = "expression.shifts", method = "UMAP", show.sample.size = T, sample.colors = meta$age %>% setNames(rownames(meta)), title = "Age"),
  cao_msa$plotSampleDistances(space = "expression.shifts", method = "UMAP", sample.colors = meta$disease_duration %>% setNames(rownames(meta)), show.sample.size = T, title = "Disease duration"),
  cao_msa$plotSampleDistances(space = "expression.shifts", method = "UMAP", sample.colors = meta$extract %>% setNames(rownames(meta)), show.sample.size = T, title = "Extract batch"),
  cao_msa$plotSampleDistances(space = "expression.shifts", method = "UMAP", sample.colors = meta$flowcell %>% setNames(rownames(meta)), show.sample.size = T, title = "Flowcell"),
  cao_msa$plotSampleDistances(space = "expression.shifts", method = "UMAP", show.sample.size = T, sample.colors = meta$pmi %>% setNames(rownames(meta)), title = "PMI"),
    cao_msa$plotSampleDistances(space = "expression.shifts", method = "UMAP", sample.colors = meta$sex %>% setNames(rownames(meta)), show.sample.size = T, title = "Sex"),
  cao_msa$plotSampleDistances(space = "expression.shifts", method = "UMAP", sample.colors = meta$subtype %>% setNames(rownames(meta)), show.sample.size = T, title = "Subtype"),
  cao_msa$test.results$metadata.separation$padjust %>%
    data.frame(p = .) %>%
    mutate(pp = -log10(p)) %>%
    tibble::rownames_to_column("covariate") %>%
    ggplot(aes(covariate, pp)) +
    geom_bar(stat = "identity", fill = "lightblue4") +
    ylim(0, pmax(2, max(-log10(cao_pd$test.results$metadata.separation$padjust)))) +
    theme_minimal() +
    theme(axis.text.x = element_text(angle = 45, hjust = 1, vjust = 1)) +
    labs(x = "", y = "-log10(adj. p)", title = "Association significance") +
    geom_hline(yintercept = -log10(0.05), colour = "red3")
), ncol = 5) -> p
## Warning in uwot(X = X, n_neighbors = n_neighbors, n_components = n_components,
## : n_components > number of columns in input data: 2 > 1, this may give poor or
## unexpected results
## Warning in uwot(X = X, n_neighbors = n_neighbors, n_components = n_components,
## : n_components > number of columns in input data: 2 > 1, this may give poor or
## unexpected results
## Warning in uwot(X = X, n_neighbors = n_neighbors, n_components = n_components,
## : n_components > number of columns in input data: 2 > 1, this may give poor or
## unexpected results
## Warning in uwot(X = X, n_neighbors = n_neighbors, n_components = n_components,
## : n_components > number of columns in input data: 2 > 1, this may give poor or
## unexpected results
## Warning in uwot(X = X, n_neighbors = n_neighbors, n_components = n_components,
## : n_components > number of columns in input data: 2 > 1, this may give poor or
## unexpected results
## Warning in uwot(X = X, n_neighbors = n_neighbors, n_components = n_components,
## : n_components > number of columns in input data: 2 > 1, this may give poor or
## unexpected results
## Warning in uwot(X = X, n_neighbors = n_neighbors, n_components = n_components,
## : n_components > number of columns in input data: 2 > 1, this may give poor or
## unexpected results
## Warning in uwot(X = X, n_neighbors = n_neighbors, n_components = n_components,
## : n_components > number of columns in input data: 2 > 1, this may give poor or
## unexpected results
p

4c

con_dis <- con$clone()
con_dis$embedding <- con$embeddings$UMAP

con_dis$samples <- con$samples %>% .[!grepl("CTRL", names(.))]

sample.groups <- con_dis$samples %>% 
  names() %>% 
  setNames(grepl.replace(., c("PD","MSA")), .)

cao_dis <- Cacoa$new(data.object = con_dis, 
                 sample.groups = ifelse(grepl("PD", sample.groups), "PD", "MSA") %>% 
                   setNames(names(sample.groups)), 
                 cell.groups = anno.major, 
                 ref.level = "PD", 
                 target.level = "MSA", 
                 n.cores = 32)

cao_dis$estimateExpressionShiftMagnitudes()
## Filtering data...
## Excluding cell types Exc. neurons that don't have enough samples
## done!
## Calculating pairwise distances using dist='cor'...
## Done!
cao_dis$estimateMetadataSeparation(sample.meta = meta %>% filter(!grepl("CTRL", sample)) %>% dplyr::select(-sample), 
                               space = "expression.shifts")
cowplot::plot_grid(plotlist = list(
  cao_dis$plotSampleDistances(space = "expression.shifts", show.sample.size = T, method = "UMAP", title = "Condition"),
  cao_dis$plotSampleDistances(space = "expression.shifts", method = "UMAP", show.sample.size = T, sample.colors = meta$age %>% setNames(rownames(meta)), title = "Age"),
  cao_dis$plotSampleDistances(space = "expression.shifts", method = "UMAP", sample.colors = meta$brain_bank %>% setNames(rownames(meta)), show.sample.size = T, title = "Brain bank"),
  cao_dis$plotSampleDistances(space = "expression.shifts", method = "UMAP", sample.colors = meta$disease_duration %>% setNames(rownames(meta)), show.sample.size = T, title = "Disease duration"),
  cao_dis$plotSampleDistances(space = "expression.shifts", method = "UMAP", sample.colors = meta$extract %>% setNames(rownames(meta)), show.sample.size = T, title = "Extract batch"),
  cao_dis$plotSampleDistances(space = "expression.shifts", method = "UMAP", sample.colors = meta$flowcell %>% setNames(rownames(meta)), show.sample.size = T, title = "Flowcell"),
  cao_dis$plotSampleDistances(space = "expression.shifts", method = "UMAP", show.sample.size = T, sample.colors = meta$pmi %>% setNames(rownames(meta)), title = "PMI"),
  cao_dis$plotSampleDistances(space = "expression.shifts", method = "UMAP", sample.colors = meta$sex %>% setNames(rownames(meta)), show.sample.size = T, title = "Sex"),
  cao_dis$plotSampleDistances(space = "expression.shifts", method = "UMAP", sample.colors = meta$subtype %>% setNames(rownames(meta)), show.sample.size = T, title = "Subtype"),
  cao_dis$test.results$metadata.separation$padjust %>%
    data.frame(p = .) %>%
    mutate(pp = -log10(p)) %>%
    tibble::rownames_to_column("covariate") %>%
    ggplot(aes(covariate, pp)) +
    geom_bar(stat = "identity", fill = "lightblue4") +
    ylim(0, pmax(2, max(-log10(cao_pd$test.results$metadata.separation$padjust)))) +
    theme_minimal() +
    theme(axis.text.x = element_text(angle = 45, hjust = 1, vjust = 1)) +
    labs(x = "", y = "-log10(adj. p)", title = "Association significance") +
    geom_hline(yintercept = -log10(0.05), colour = "red3")
), ncol = 5) -> p
## Warning in uwot(X = X, n_neighbors = n_neighbors, n_components = n_components,
## : n_components > number of columns in input data: 2 > 1, this may give poor or
## unexpected results
## Warning in uwot(X = X, n_neighbors = n_neighbors, n_components = n_components,
## : n_components > number of columns in input data: 2 > 1, this may give poor or
## unexpected results
## Warning in uwot(X = X, n_neighbors = n_neighbors, n_components = n_components,
## : n_components > number of columns in input data: 2 > 1, this may give poor or
## unexpected results
## Warning in uwot(X = X, n_neighbors = n_neighbors, n_components = n_components,
## : n_components > number of columns in input data: 2 > 1, this may give poor or
## unexpected results
## Warning in uwot(X = X, n_neighbors = n_neighbors, n_components = n_components,
## : n_components > number of columns in input data: 2 > 1, this may give poor or
## unexpected results
## Warning in uwot(X = X, n_neighbors = n_neighbors, n_components = n_components,
## : n_components > number of columns in input data: 2 > 1, this may give poor or
## unexpected results
## Warning in uwot(X = X, n_neighbors = n_neighbors, n_components = n_components,
## : n_components > number of columns in input data: 2 > 1, this may give poor or
## unexpected results
## Warning in uwot(X = X, n_neighbors = n_neighbors, n_components = n_components,
## : n_components > number of columns in input data: 2 > 1, this may give poor or
## unexpected results
## Warning in uwot(X = X, n_neighbors = n_neighbors, n_components = n_components,
## : n_components > number of columns in input data: 2 > 1, this may give poor or
## unexpected results
p

Extended Data Table 3

Not rerun

list(cao_msa, cao_pd, cao_dis) %>% 
  lapply(getElement, "test.results") %>% 
  lget("metadata.separation") %>%
  lapply(\(x) x[-1]) %>%
  Map(\(dat, nn) bind_cols(dat) %>% data.frame() %>%  `rownames<-`(nn), dat = ., nn = lapply(., lapply, names) %>% sget(1)) %>% 
  setNames(c("MSA", "PD", "dis")) %>% 
  lapply(tibble::rownames_to_column, var = "covariate") %>% 
  bind_rows(.id = "comparison") %>% 
  mutate(comparison = scHelper::grepl.replace(comparison, c("MSA", "PD", "dis"), c("CTRL vs MSA", "CTRL vs PD", "PD vs MSA"))) %>% 
  write.table("/work/msa/05_tables/Table SX - Cacoa covariate analysis.tsv", sep = "\t", dec = ".", row.names = F)

Extended Data Figure 5

To investigate effects from co-variates, we ran scITD on all cell types for each comparison. We don’t determine factors here as it takes a significant amount of time, but we keep the code in the first instance.

We tested different levels of var_scale_power, here we’re just showing var_scale_power = 0.5 as we didn’t find any effects except for OPCs. Per default, we set vargenes_method=“norm_var_pvals” and adjusted vargenes_thresh to obtain around 1,000 var. genes, but in some instances we took the top most variable genes instead.

Neurons, preparation

con.tmp <- qread("cao_neurons.qs", nthreads = 10)$data.object
anno <- qread("anno_neurons.qs")

anno %<>% 
  collapseAnnotation("GABA") %>%
  collapseAnnotation("GLU") %>%
  renameAnnotation("GABA", "Inh. neurons") %>%
  renameAnnotation("GLU", "Exc. neurons") %>%
  collapseAnnotation("MSN")
## Collapsing 11 labels containing 'GABA' in their name into one label.
## Collapsing 2 labels containing 'GLU' in their name into one label.
## Collapsing 3 labels containing 'MSN' in their name into one label.
cm.raw.all <- con.tmp$getJointCountMatrix(raw = T) %>% 
  Matrix::t() %>% 
  .[!grepl("MT-|RPS|RPL", rownames(.)), ]

meta.all <- con.tmp$getDatasetPerCell() %>% 
  data.frame(donors = .) %>% 
  mutate(ctypes = anno[rownames(.)] %>% unname()) %>% 
  .[complete.cases(.), ]

metadata.all <- read.delim("metadata.tsv")

for (cc in colnames(metadata.all)[-1]) {
  meta.all[[cc]] <- metadata.all[[cc]][match(meta.all$donors, metadata.all$sample)]
}

# Convert integers to numeric
meta.all$age %<>% as.numeric()
meta.all$disease_duration %<>% as.numeric()

CTRL vs MSA

meta <- meta.all %>% 
  filter(condition != "PD") %>% 
  mutate(donors = factor(donors),
         ctypes = factor(ctypes))

metadata <- metadata.all %>% 
  filter(condition != "PD")

cm.raw <- cm.raw.all %>% 
  .[, colnames(.) %in% rownames(meta)]

# set up project parameters
param_list <- initialize_params(ctypes_use = meta$ctypes %>% levels(),
                                ncores = 31, 
                                rand_seed = 10)

# create project container
itd_container <- make_new_container(count_data=cm.raw %>% .[, colnames(.) %in% rownames(meta)], 
                                    meta_data=meta,
                                    params=param_list,
                                    label_donor_sex = F)
## Warning in make_new_container(count_data = cm.raw %>% .[, colnames(.) %in% :
## Cell type names cannot have special characters '.', '_', or ':'. Removing these
## characters and concatenating name components.
itd_container %<>% form_tensor(donor_min_cells=1,
                               norm_method='trim', 
                               scale_factor=10000,
                               vargenes_method='norm_var_pvals', 
                               vargenes_thresh=0.7,
                               scale_var = TRUE, 
                               var_scale_power = 0.5)
## parsing data matrix by cell/tissue type...
## cleaning data...
## Keeping 10 donors. All donors have at least 1 cells in each cell type included.
## collapsing count matrices from cells to donors (aka pseudobulk operation)...
## normalizing data...
## calculating gene overdispersion factors...
## selecting highly variable genes from each cell type...
## scaling variance...
## forming tensor...
## Complete!
print(length(itd_container[["all_vargenes"]]))
## [1] 1179
# get assistance with rank determination
itd_container %<>% determine_ranks_tucker(max_ranks_test=c(10,15),
                                          shuffle_level='cells',
                                          num_iter=10,
                                          norm_method='trim',
                                          scale_factor=10000,
                                          scale_var=TRUE,
                                          var_scale_power=0.5)

itd_container$plots$rank_determination_plot
itd_container %<>% run_tucker_ica(ranks=c(5,10),
                                  tucker_type = 'regular', 
                                  rotation_type = 'hybrid')

# get donor scores-metadata associations
itd_container %<>% get_meta_associations(vars_test=c('sex','pmi', "age", "flowcell", "extract"), # Omitting brain bank
                                         stat_use='pval')

# plot donor scores
itd_container %<>% plot_donor_matrix(meta_vars=c('sex','pmi', "age", "flowcell", "extract"), # Omitting brain bank
                                     show_donor_ids = TRUE,
                                     add_meta_associations="pval")

# show the donor scores heatmap
p <- itd_container$plots$donor_matrix

p

CTRL vs PD

meta <- meta.all %>% 
  filter(condition != "MSA") %>% 
  mutate(donors = factor(donors),
         ctypes = factor(ctypes))

metadata <- metadata.all %>% 
  filter(condition != "MSA")

cm.raw <- cm.raw.all %>% 
  .[, colnames(.) %in% rownames(meta)]

# set up project parameters
param_list <- initialize_params(ctypes_use = meta$ctypes %>% levels(),
                                ncores = 31, 
                                rand_seed = 10)

# create project container
itd_container <- make_new_container(count_data=cm.raw, 
                                    meta_data=meta,
                                    params=param_list,
                                    label_donor_sex = F)
## Warning in make_new_container(count_data = cm.raw, meta_data = meta, params =
## param_list, : Cell type names cannot have special characters '.', '_', or ':'.
## Removing these characters and concatenating name components.
itd_container %<>% form_tensor(donor_min_cells=1,
                               norm_method='trim', 
                               scale_factor=10000,
                               vargenes_method='norm_var',
                               vargenes_thresh=500,
                               scale_var = TRUE, 
                               var_scale_power = 0.5)
## parsing data matrix by cell/tissue type...
## cleaning data...
## Keeping 9 donors. All donors have at least 1 cells in each cell type included.
## collapsing count matrices from cells to donors (aka pseudobulk operation)...
## normalizing data...
## calculating gene overdispersion factors...
## selecting highly variable genes from each cell type...
## scaling variance...
## forming tensor...
## Complete!
print(length(itd_container[["all_vargenes"]]))
## [1] 1084
itd_container %<>% run_tucker_ica(ranks=c(5,10),
                                  tucker_type = 'regular', 
                                  rotation_type = 'hybrid')

# get donor scores-metadata associations
itd_container %<>% get_meta_associations(vars_test=c('sex','pmi', "age", "brain_bank", "flowcell", "extract"),
                                         stat_use='pval')

# plot donor scores
itd_container %<>% plot_donor_matrix(meta_vars=c('sex','pmi', "age", "brain_bank", "flowcell", "extract"),
                                     show_donor_ids = TRUE,
                                     add_meta_associations='pval')

# show the donor scores heatmap
p <- itd_container$plots$donor_matrix

p

PD vs MSA

meta <- meta.all %>% 
  filter(condition != "CTRL") %>% 
  mutate(donors = factor(donors),
         ctypes = factor(ctypes))

metadata <- metadata.all %>% 
  filter(condition != "CTRL")

cm.raw <- cm.raw.all %>% 
  .[, colnames(.) %in% rownames(meta)]

# set up project parameters
param_list <- initialize_params(ctypes_use = meta$ctypes %>% levels(),
                                ncores = 31, 
                                rand_seed = 10)

# create project container
itd_container <- make_new_container(count_data=cm.raw, 
                                    meta_data=meta,
                                    params=param_list,
                                    label_donor_sex = F)
## Warning in make_new_container(count_data = cm.raw, meta_data = meta, params =
## param_list, : Cell type names cannot have special characters '.', '_', or ':'.
## Removing these characters and concatenating name components.
itd_container %<>% form_tensor(donor_min_cells=1,
                               norm_method='trim', 
                               scale_factor=10000,
                               vargenes_method='norm_var', 
                               vargenes_thresh=500,
                               scale_var = TRUE, 
                               var_scale_power = 0.5)
## parsing data matrix by cell/tissue type...
## cleaning data...
## Keeping 7 donors. All donors have at least 1 cells in each cell type included.
## Consider using fewer cell types or reducing the donor_min_cells parameter to include more donors.
## collapsing count matrices from cells to donors (aka pseudobulk operation)...
## normalizing data...
## calculating gene overdispersion factors...
## selecting highly variable genes from each cell type...
## scaling variance...
## forming tensor...
## Complete!
print(length(itd_container[["all_vargenes"]]))
## [1] 1084
itd_container %<>% run_tucker_ica(ranks=c(5,10),
                                  tucker_type = 'regular', 
                                  rotation_type = 'hybrid')

# get donor scores-metadata associations
itd_container %<>% get_meta_associations(vars_test=c('sex','pmi', "age", "brain_bank", "flowcell", "extract"),
                                         stat_use='pval')

# plot donor scores
itd_container %<>% plot_donor_matrix(meta_vars=c('sex','pmi', "age", "brain_bank", "flowcell", "extract"),
                                     show_donor_ids = TRUE,
                                     add_meta_associations='pval')

# show the donor scores heatmap
p <- itd_container$plots$donor_matrix

p

Astrocytes, preparation

con.tmp <- qread("con_astrocytes.qs", nthreads = 10)
anno <- qread("anno_astro.qs")

cm.raw.all <- con.tmp$getJointCountMatrix(raw = T) %>%
  .[, !grepl("MT-|RPS|RPL", colnames(.))] %>% 
  Matrix::t()

meta.all <- con.tmp$getDatasetPerCell() %>% 
  .[names(.) %in% names(anno)] %>% 
  data.frame(donors = .) %>% 
  mutate(ctypes = anno[rownames(.)] %>% unname()) %>% 
  .[complete.cases(.), ]

metadata.all <- read.delim("metadata.tsv")

for (cc in colnames(metadata.all)[-1]) {
  meta.all[[cc]] <- metadata.all[[cc]][match(meta.all$donors, metadata.all$sample)]
}

# Convert integers to numeric
meta.all$age %<>% as.numeric()
meta.all$disease_duration %<>% as.numeric()

CTRL vs MSA

meta <- meta.all %>% 
  filter(condition != "PD") %>% 
  mutate(donors = factor(donors),
         ctypes = factor(ctypes))

metadata <- metadata.all %>% 
  filter(condition != "PD")

cm.raw <- cm.raw.all %>% 
  .[, colnames(.) %in% rownames(meta)]

# set up project parameters
param_list <- initialize_params(ctypes_use = meta$ctypes %>% levels(),
                                ncores = 31, 
                                rand_seed = 10)

# create project container
itd_container <- make_new_container(count_data=cm.raw %>% .[, colnames(.) %in% rownames(meta)], 
                                    meta_data=meta,
                                    params=param_list,
                                    label_donor_sex = F)
## Warning in make_new_container(count_data = cm.raw %>% .[, colnames(.) %in% :
## Cell type names cannot have special characters '.', '_', or ':'. Removing these
## characters and concatenating name components.
itd_container %<>% form_tensor(donor_min_cells=5,
                               norm_method='trim', 
                               scale_factor=10000,
                               vargenes_method='norm_var_pvals', 
                               vargenes_thresh=0.05,
                               scale_var = TRUE, 
                               var_scale_power = 0.5)
## parsing data matrix by cell/tissue type...
## cleaning data...
## Keeping 17 donors. All donors have at least 5 cells in each cell type included.
## collapsing count matrices from cells to donors (aka pseudobulk operation)...
## normalizing data...
## calculating gene overdispersion factors...
## selecting highly variable genes from each cell type...
## scaling variance...
## forming tensor...
## Complete!
print(length(itd_container[["all_vargenes"]]))
## [1] 704
itd_container %<>% run_tucker_ica(ranks=c(5,8),
                                  tucker_type = 'regular', 
                                  rotation_type = 'hybrid')

# get donor scores-metadata associations
itd_container %<>% get_meta_associations(vars_test=c('sex','pmi', "age", "flowcell", "extract"), # Omitting brain bank
                                         stat_use='pval')

# plot donor scores
itd_container %<>% plot_donor_matrix(meta_vars=c('sex','pmi', "age", "flowcell", "extract"), # Omitting brain bank
                                     show_donor_ids = TRUE,
                                     add_meta_associations='pval')

# show the donor scores heatmap
p <- itd_container$plots$donor_matrix

p

CTRL vs PD

meta <- meta.all %>% 
  filter(condition != "MSA") %>% 
  mutate(donors = factor(donors),
         ctypes = factor(ctypes))

metadata <- metadata.all %>% 
  filter(condition != "MSA")

cm.raw <- cm.raw.all %>% 
  .[, colnames(.) %in% rownames(meta)]

# set up project parameters
param_list <- initialize_params(ctypes_use = meta$ctypes %>% levels(),
                                ncores = 31, 
                                rand_seed = 10)

# create project container
itd_container <- make_new_container(count_data=cm.raw, 
                                    meta_data=meta,
                                    params=param_list,
                                    label_donor_sex = F)
## Warning in make_new_container(count_data = cm.raw, meta_data = meta, params =
## param_list, : Cell type names cannot have special characters '.', '_', or ':'.
## Removing these characters and concatenating name components.
itd_container %<>% form_tensor(donor_min_cells=5,
                               norm_method='trim', 
                               scale_factor=10000,
                               vargenes_method='norm_var_pvals', 
                               vargenes_thresh=.05,
                               scale_var = TRUE, 
                               var_scale_power = 0.5)
## parsing data matrix by cell/tissue type...
## cleaning data...
## Keeping 22 donors. All donors have at least 5 cells in each cell type included.
## collapsing count matrices from cells to donors (aka pseudobulk operation)...
## normalizing data...
## calculating gene overdispersion factors...
## selecting highly variable genes from each cell type...
## scaling variance...
## forming tensor...
## Complete!
print(length(itd_container[["all_vargenes"]]))
## [1] 871
itd_container %<>% run_tucker_ica(ranks=c(5,9),
                                  tucker_type = 'regular', 
                                  rotation_type = 'hybrid')

# get donor scores-metadata associations
itd_container %<>% get_meta_associations(vars_test=c('sex','pmi', "age", "brain_bank", "flowcell", "extract"), # Omitting brain bank
                                         stat_use='pval')

# plot donor scores
itd_container %<>% plot_donor_matrix(meta_vars=c('sex','pmi', "age", "brain_bank", "flowcell", "extract"), # Omitting brain bank
                                     show_donor_ids = TRUE,
                                     add_meta_associations='pval')

# show the donor scores heatmap
p <- itd_container$plots$donor_matrix

p

PD vs MSA

meta <- meta.all %>% 
  filter(condition != "CTRL") %>% 
  mutate(donors = factor(donors),
         ctypes = factor(ctypes))

metadata <- metadata.all %>% 
  filter(condition != "CTRL")

cm.raw <- cm.raw.all %>% 
  .[, colnames(.) %in% rownames(meta)]

# set up project parameters
param_list <- initialize_params(ctypes_use = meta$ctypes %>% levels(),
                                ncores = 31, 
                                rand_seed = 10)

# create project container
itd_container <- make_new_container(count_data=cm.raw, 
                                    meta_data=meta,
                                    params=param_list,
                                    label_donor_sex = F)
## Warning in make_new_container(count_data = cm.raw, meta_data = meta, params =
## param_list, : Cell type names cannot have special characters '.', '_', or ':'.
## Removing these characters and concatenating name components.
itd_container %<>% form_tensor(donor_min_cells=5,
                               norm_method='trim', 
                               scale_factor=10000,
                               vargenes_method='norm_var_pvals', 
                               vargenes_thresh=.05,
                               scale_var = TRUE, 
                               var_scale_power = 0.5)
## parsing data matrix by cell/tissue type...
## cleaning data...
## Keeping 19 donors. All donors have at least 5 cells in each cell type included.
## collapsing count matrices from cells to donors (aka pseudobulk operation)...
## normalizing data...
## calculating gene overdispersion factors...
## selecting highly variable genes from each cell type...
## scaling variance...
## forming tensor...
## Complete!
print(length(itd_container[["all_vargenes"]]))
## [1] 701
itd_container %<>% run_tucker_ica(ranks=c(6,7),
                                  tucker_type = 'regular', 
                                  rotation_type = 'hybrid')

# get donor scores-metadata associations
itd_container %<>% get_meta_associations(vars_test=c('sex','pmi', "age", "brain_bank", "flowcell", "extract"),
                                         stat_use='pval')

# plot donor scores
itd_container %<>% plot_donor_matrix(meta_vars=c('sex','pmi', "age", "brain_bank", "flowcell", "extract"), 
                                     show_donor_ids = TRUE,
                                     add_meta_associations='pval')

# show the donor scores heatmap
p <- itd_container$plots$donor_matrix

p

Microglia, preparation

con.tmp <- qread("cao_micro_pvm.qs", nthreads = 10)$data.object
anno <- qread("anno_micro_pvm.qs")

cm.raw.all <- con.tmp$getJointCountMatrix(raw = T) %>% 
  Matrix::t() %>% 
  .[!grepl("MT-|RPS|RPL", rownames(.)), ]

meta.all <- con.tmp$getDatasetPerCell() %>% 
  data.frame(donors = .) %>% 
  mutate(ctypes = anno[rownames(.)] %>% unname()) %>% 
  .[complete.cases(.), ]

metadata.all <- read.delim("metadata.tsv")

for (cc in colnames(metadata.all)[-1]) {
  meta.all[[cc]] <- metadata.all[[cc]][match(meta.all$donors, metadata.all$sample)]
}

# Convert integers to numeric
meta.all$age %<>% as.numeric()
meta.all$disease_duration %<>% as.numeric()

CTRL vs MSA

meta <- meta.all %>% 
  filter(condition != "PD") %>% 
  mutate(donors = factor(donors),
         ctypes = factor(ctypes))

metadata <- metadata.all %>% 
  filter(condition != "PD")

cm.raw <- cm.raw.all %>% 
  .[, colnames(.) %in% rownames(meta)]

# set up project parameters
param_list <- initialize_params(ctypes_use = meta$ctypes %>% levels(),
                                ncores = 31, 
                                rand_seed = 10)

# create project container
itd_container <- make_new_container(count_data=cm.raw %>% .[, colnames(.) %in% rownames(meta)], 
                                    meta_data=meta,
                                    params=param_list,
                                    label_donor_sex = F)

itd_container %<>% form_tensor(donor_min_cells=3,
                               norm_method='trim', 
                               scale_factor=10000,
                               vargenes_method='norm_var', 
                               vargenes_thresh=500,
                               scale_var = TRUE, 
                               var_scale_power = 0.5)
## parsing data matrix by cell/tissue type...
## cleaning data...
## Keeping 10 donors. All donors have at least 3 cells in each cell type included.
## collapsing count matrices from cells to donors (aka pseudobulk operation)...
## normalizing data...
## calculating gene overdispersion factors...
## selecting highly variable genes from each cell type...
## scaling variance...
## forming tensor...
## Complete!
print(length(itd_container[["all_vargenes"]]))
## [1] 1114
itd_container %<>% run_tucker_ica(ranks=c(3,7),
                                  tucker_type = 'regular', 
                                  rotation_type = 'hybrid')

factors <- c('sex','pmi', "age", "flowcell", "extract")

# get donor scores-metadata associations
itd_container %<>% get_meta_associations(vars_test = factors, 
                                         stat_use='pval')

# plot donor scores
itd_container %<>% plot_donor_matrix(meta_vars= factors,
                                     show_donor_ids = TRUE,
                                     add_meta_associations='pval')

# show the donor scores heatmap
p <- itd_container$plots$donor_matrix

p

CTRL vs PD

meta <- meta.all %>% 
  filter(condition != "MSA") %>% 
  mutate(donors = factor(donors),
         ctypes = factor(ctypes))

metadata <- metadata.all %>% 
  filter(condition != "MSA")

cm.raw <- cm.raw.all %>% 
  .[, colnames(.) %in% rownames(meta)]

# set up project parameters
param_list <- initialize_params(ctypes_use = meta$ctypes %>% levels(),
                                ncores = 31, 
                                rand_seed = 10)

# create project container
itd_container <- make_new_container(count_data=cm.raw, 
                                    meta_data=meta,
                                    params=param_list,
                                    label_donor_sex = F)

itd_container %<>% form_tensor(donor_min_cells=3,
                               norm_method='trim', 
                               scale_factor=10000,
                               vargenes_method='norm_var', 
                               vargenes_thresh=500,
                               scale_var = TRUE, 
                               var_scale_power = 0.5)
## parsing data matrix by cell/tissue type...
## cleaning data...
## Keeping 18 donors. All donors have at least 3 cells in each cell type included.
## collapsing count matrices from cells to donors (aka pseudobulk operation)...
## normalizing data...
## calculating gene overdispersion factors...
## selecting highly variable genes from each cell type...
## scaling variance...
## forming tensor...
## Complete!
print(length(itd_container[["all_vargenes"]]))
## [1] 1012
itd_container %<>% run_tucker_ica(ranks=c(4,10),
                                  tucker_type = 'regular', 
                                  rotation_type = 'hybrid')

# get donor scores-metadata associations
itd_container %<>% get_meta_associations(vars_test=c('sex','pmi', "age", "brain_bank", "flowcell", "extract"),
                                         stat_use='pval')

# plot donor scores
itd_container %<>% plot_donor_matrix(meta_vars=c('sex','pmi', "age", "brain_bank", "flowcell", "extract"),
                                     show_donor_ids = TRUE,
                                     add_meta_associations='pval')

# show the donor scores heatmap
p <- itd_container$plots$donor_matrix

p

PD vs MSA

meta <- meta.all %>% 
  filter(condition != "CTRL") %>% 
  mutate(donors = factor(donors),
         ctypes = factor(ctypes))

metadata <- metadata.all %>% 
  filter(condition != "CTRL")

cm.raw <- cm.raw.all %>% 
  .[, colnames(.) %in% rownames(meta)]

# set up project parameters
param_list <- initialize_params(ctypes_use = meta$ctypes %>% levels(),
                                ncores = 31, 
                                rand_seed = 10)

# create project container
itd_container <- make_new_container(count_data=cm.raw, 
                                    meta_data=meta,
                                    params=param_list,
                                    label_donor_sex = F)

itd_container %<>% form_tensor(donor_min_cells=3,
                               norm_method='trim', 
                               scale_factor=10000,
                               vargenes_method='norm_var', 
                               vargenes_thresh=500,
                               scale_var = TRUE, 
                               var_scale_power = 0.5)
## parsing data matrix by cell/tissue type...
## cleaning data...
## Keeping 14 donors. All donors have at least 3 cells in each cell type included.
## collapsing count matrices from cells to donors (aka pseudobulk operation)...
## normalizing data...
## calculating gene overdispersion factors...
## selecting highly variable genes from each cell type...
## scaling variance...
## forming tensor...
## Complete!
print(length(itd_container[["all_vargenes"]]))
## [1] 1001
itd_container %<>% run_tucker_ica(ranks=c(4,10),
                                  tucker_type = 'regular', 
                                  rotation_type = 'hybrid')

# get donor scores-metadata associations
itd_container %<>% get_meta_associations(vars_test=c('sex','pmi', "age", "brain_bank", "flowcell", "extract"), 
                                         stat_use='pval')

# plot donor scores
itd_container %<>% plot_donor_matrix(meta_vars=c('sex','pmi', "age", "brain_bank", "flowcell", "extract"), 
                                     show_donor_ids = TRUE,
                                     add_meta_associations='pval')

# show the donor scores heatmap
p <- itd_container$plots$donor_matrix

p

Oligodendrocytes, preparation

con.tmp <- qread("con_oligodendrocytes.qs", nthreads = 10)
anno <- qread("anno_oligo.qs")

cm.raw.all <- con.tmp$getJointCountMatrix(raw = T) %>%
  .[rownames(.) %in% names(anno), !grepl("MT-|RPS|RPL", colnames(.))] %>% 
  Matrix::t()

meta.all <- con.tmp$getDatasetPerCell() %>% 
  data.frame(donors = .) %>% 
  mutate(ctypes = anno[rownames(.)] %>% unname()) %>% 
  .[complete.cases(.), ]

metadata.all <- read.delim("metadata.tsv")

for (cc in colnames(metadata.all)[-1]) {
  meta.all[[cc]] <- metadata.all[[cc]][match(meta.all$donors, metadata.all$sample)]
}

# Convert integers to numeric
meta.all$age %<>% as.numeric()
meta.all$disease_duration %<>% as.numeric()

CTRL vs MSA

meta <- meta.all %>% 
  filter(condition != "PD") %>% 
  mutate(donors = factor(donors),
         ctypes = factor(ctypes))

metadata <- metadata.all %>% 
  filter(condition != "PD")

cm.raw <- cm.raw.all %>% 
  .[, colnames(.) %in% rownames(meta)]

# set up project parameters
param_list <- initialize_params(ctypes_use = meta$ctypes %>% levels(),
                                ncores = 31, 
                                rand_seed = 10)

# create project container
itd_container <- make_new_container(count_data=cm.raw %>% .[, colnames(.) %in% rownames(meta)], 
                                    meta_data=meta,
                                    params=param_list,
                                    label_donor_sex = F)
## Warning in make_new_container(count_data = cm.raw %>% .[, colnames(.) %in% :
## Cell type names cannot have special characters '.', '_', or ':'. Removing these
## characters and concatenating name components.
itd_container %<>% form_tensor(donor_min_cells=5,
                               norm_method='trim', 
                               scale_factor=10000,
                               vargenes_method='norm_var_pvals', 
                               vargenes_thresh=0.05,
                               scale_var = TRUE, 
                               var_scale_power = 1) # Diminishable effect at 0.5, but not on any other level
## parsing data matrix by cell/tissue type...
## cleaning data...
## Keeping 17 donors. All donors have at least 5 cells in each cell type included.
## collapsing count matrices from cells to donors (aka pseudobulk operation)...
## normalizing data...
## calculating gene overdispersion factors...
## selecting highly variable genes from each cell type...
## scaling variance...
## forming tensor...
## Complete!
print(length(itd_container[["all_vargenes"]]))
## [1] 1209
itd_container %<>% run_tucker_ica(ranks=c(6,9),
                                  tucker_type = 'regular', 
                                  rotation_type = 'hybrid')

# get donor scores-metadata associations
itd_container %<>% get_meta_associations(vars_test=c('sex','pmi', "age", "flowcell", "extract"), # Omitting brain bank
                                         stat_use='pval')

# plot donor scores
itd_container %<>% plot_donor_matrix(meta_vars=c('sex','pmi', "age", "flowcell", "extract"), # Omitting brain bank
                                     show_donor_ids = TRUE,
                                     add_meta_associations='pval')

# show the donor scores heatmap
p <- itd_container$plots$donor_matrix

p

CTRL vs PD

meta <- meta.all %>% 
  filter(condition != "MSA") %>% 
  mutate(donors = factor(donors),
         ctypes = factor(ctypes))

metadata <- metadata.all %>% 
  filter(condition != "MSA")

cm.raw <- cm.raw.all %>% 
  .[, colnames(.) %in% rownames(meta)]

# set up project parameters
param_list <- initialize_params(ctypes_use = meta$ctypes %>% levels(),
                                ncores = 31, 
                                rand_seed = 10)

# create project container
itd_container <- make_new_container(count_data=cm.raw, 
                                    meta_data=meta,
                                    params=param_list,
                                    label_donor_sex = F)
## Warning in make_new_container(count_data = cm.raw, meta_data = meta, params =
## param_list, : Cell type names cannot have special characters '.', '_', or ':'.
## Removing these characters and concatenating name components.
itd_container %<>% form_tensor(donor_min_cells=5,
                               norm_method='trim', 
                               scale_factor=10000,
                               vargenes_method='norm_var_pvals', 
                               vargenes_thresh=.05,
                               scale_var = TRUE, 
                               var_scale_power = 0.5)
## parsing data matrix by cell/tissue type...
## cleaning data...
## Keeping 21 donors. All donors have at least 5 cells in each cell type included.
## collapsing count matrices from cells to donors (aka pseudobulk operation)...
## normalizing data...
## calculating gene overdispersion factors...
## selecting highly variable genes from each cell type...
## scaling variance...
## forming tensor...
## Complete!
print(length(itd_container[["all_vargenes"]]))
## [1] 1505
itd_container %<>% run_tucker_ica(ranks=c(4,8),
                                  tucker_type = 'regular', 
                                  rotation_type = 'hybrid')

# get donor scores-metadata associations
itd_container %<>% get_meta_associations(vars_test=c('sex','pmi', "age", "brain_bank", "flowcell", "extract"), 
                                         stat_use='pval')

# plot donor scores
itd_container %<>% plot_donor_matrix(meta_vars=c('sex','pmi', "age", "brain_bank", "flowcell", "extract"),
                                     show_donor_ids = TRUE,
                                     add_meta_associations='pval')

# show the donor scores heatmap
p <- itd_container$plots$donor_matrix

p

PD vs MSA

meta <- meta.all %>% 
  filter(condition != "CTRL") %>% 
  mutate(donors = factor(donors),
         ctypes = factor(ctypes))

metadata <- metadata.all %>% 
  filter(condition != "CTRL")

cm.raw <- cm.raw.all %>% 
  .[, colnames(.) %in% rownames(meta)]

# set up project parameters
param_list <- initialize_params(ctypes_use = meta$ctypes %>% levels(),
                                ncores = 31, 
                                rand_seed = 10)

# create project container
itd_container <- make_new_container(count_data=cm.raw, 
                                    meta_data=meta,
                                    params=param_list,
                                    label_donor_sex = F)
## Warning in make_new_container(count_data = cm.raw, meta_data = meta, params =
## param_list, : Cell type names cannot have special characters '.', '_', or ':'.
## Removing these characters and concatenating name components.
itd_container %<>% form_tensor(donor_min_cells=5,
                               norm_method='trim', 
                               scale_factor=10000,
                               vargenes_method='norm_var_pvals', 
                               vargenes_thresh=.05,
                               scale_var = TRUE, 
                               var_scale_power = 1) # Diminishable effect at 0.5, but not on any other level
## parsing data matrix by cell/tissue type...
## cleaning data...
## Keeping 18 donors. All donors have at least 5 cells in each cell type included.
## collapsing count matrices from cells to donors (aka pseudobulk operation)...
## normalizing data...
## calculating gene overdispersion factors...
## selecting highly variable genes from each cell type...
## scaling variance...
## forming tensor...
## Complete!
print(length(itd_container[["all_vargenes"]]))
## [1] 1146
itd_container %<>% run_tucker_ica(ranks=c(5,6),
                                  tucker_type = 'regular', 
                                  rotation_type = 'hybrid')

# get donor scores-metadata associations
itd_container %<>% get_meta_associations(vars_test=c('sex','pmi', "age", "brain_bank", "flowcell", "extract"), 
                                         stat_use='pval')

# plot donor scores
itd_container %<>% plot_donor_matrix(meta_vars=c('sex','pmi', "age", "brain_bank", "flowcell", "extract"), 
                                     show_donor_ids = TRUE,
                                     add_meta_associations='pval')

# show the donor scores heatmap
p <- itd_container$plots$donor_matrix

p

OPCs, preparation

con.tmp <- qread("con_opcs.qs", nthreads = 10)
anno <- qread("anno_opc.qs")

cm.raw.all <- con.tmp$getJointCountMatrix(raw = T) %>% 
  Matrix::t() %>% 
  .[!grepl("MT-|RPS|RPL", rownames(.)), ]

meta.all <- con.tmp$getDatasetPerCell() %>% 
  data.frame(donors = .) %>% 
  mutate(ctypes = anno[rownames(.)] %>% unname()) %>% 
  .[complete.cases(.), ]

metadata.all <- read.delim("metadata.tsv")

for (cc in colnames(metadata.all)[-1]) {
  meta.all[[cc]] <- metadata.all[[cc]][match(meta.all$donors, metadata.all$sample)]
}

# Convert integers to numeric
meta.all$age %<>% as.numeric()
meta.all$disease_duration %<>% as.numeric()

CTRL vs MSA

meta <- meta.all %>% 
  filter(condition != "PD") %>% 
  mutate(donors = factor(donors),
         ctypes = factor(ctypes))

metadata <- metadata.all %>% 
  filter(condition != "PD")

cm.raw <- cm.raw.all %>% 
  .[, colnames(.) %in% rownames(meta)]

# set up project parameters
param_list <- initialize_params(ctypes_use = meta$ctypes %>% levels(),
                                ncores = 31, 
                                rand_seed = 10)

# create project container
itd_container <- make_new_container(count_data=cm.raw %>% .[, colnames(.) %in% rownames(meta)], 
                                    meta_data=meta,
                                    params=param_list,
                                    label_donor_sex = F)

itd_container %<>% form_tensor(donor_min_cells=5,
                               norm_method='trim', 
                               scale_factor=10000,
                               vargenes_method='norm_var_pvals', 
                               vargenes_thresh=0.2,
                               scale_var = TRUE, 
                               var_scale_power = 0.5) # Diminishable effect at 2, but not on any other level
## parsing data matrix by cell/tissue type...
## cleaning data...
## Keeping 17 donors. All donors have at least 5 cells in each cell type included.
## collapsing count matrices from cells to donors (aka pseudobulk operation)...
## normalizing data...
## calculating gene overdispersion factors...
## selecting highly variable genes from each cell type...
## scaling variance...
## forming tensor...
## Complete!
print(length(itd_container[["all_vargenes"]]))
## [1] 778
itd_container %<>% run_tucker_ica(ranks=c(4,5),
                                  tucker_type = 'regular', 
                                  rotation_type = 'hybrid')

# get donor scores-metadata associations
itd_container %<>% get_meta_associations(vars_test=c('sex','pmi', "age", "flowcell", "extract"), # Omitting brain bank
                                         stat_use='pval')

# plot donor scores
itd_container %<>% plot_donor_matrix(meta_vars=c('sex','pmi', "age", "flowcell", "extract"), # Omitting brain bank
                                     show_donor_ids = TRUE,
                                     add_meta_associations='pval')

# show the donor scores heatmap
p <- itd_container$plots$donor_matrix

p

CTRL vs PD

meta <- meta.all %>% 
  filter(condition != "MSA") %>% 
  mutate(donors = factor(donors),
         ctypes = factor(ctypes))

metadata <- metadata.all %>% 
  filter(condition != "MSA")

cm.raw <- cm.raw.all %>% 
  .[, colnames(.) %in% rownames(meta)]

# set up project parameters
param_list <- initialize_params(ctypes_use = meta$ctypes %>% levels(),
                                ncores = 31, 
                                rand_seed = 10)

# create project container
itd_container <- make_new_container(count_data=cm.raw, 
                                    meta_data=meta,
                                    params=param_list,
                                    label_donor_sex = F)

itd_container %<>% form_tensor(donor_min_cells=5,
                               norm_method='trim', 
                               scale_factor=10000,
                               vargenes_method='norm_var_pvals', 
                               vargenes_thresh=.4,
                               scale_var = TRUE, 
                               var_scale_power = 1) # No effect at 0.5, but 1, 1.5, 2
## parsing data matrix by cell/tissue type...
## cleaning data...
## Keeping 21 donors. All donors have at least 5 cells in each cell type included.
## collapsing count matrices from cells to donors (aka pseudobulk operation)...
## normalizing data...
## calculating gene overdispersion factors...
## selecting highly variable genes from each cell type...
## scaling variance...
## forming tensor...
## Complete!
print(length(itd_container[["all_vargenes"]]))
## [1] 833
itd_container %<>% run_tucker_ica(ranks=c(4,6),
                                  tucker_type = 'regular', 
                                  rotation_type = 'hybrid')

# get donor scores-metadata associations
itd_container %<>% get_meta_associations(vars_test=c('sex','pmi', "age", "brain_bank", "flowcell", "extract"),
                                         stat_use='pval')

# plot donor scores
itd_container %<>% plot_donor_matrix(meta_vars=c('sex','pmi', "age", "brain_bank", "flowcell", "extract"),
                                     show_donor_ids = TRUE,
                                     add_meta_associations='pval')

# show the donor scores heatmap
p <- itd_container$plots$donor_matrix

p

PD vs MSA

meta <- meta.all %>% 
  filter(condition != "CTRL") %>% 
  mutate(donors = factor(donors),
         ctypes = factor(ctypes))

metadata <- metadata.all %>% 
  filter(condition != "CTRL")

cm.raw <- cm.raw.all %>% 
  .[, colnames(.) %in% rownames(meta)]

# set up project parameters
param_list <- initialize_params(ctypes_use = meta$ctypes %>% levels(),
                                ncores = 31, 
                                rand_seed = 10)

# create project container
itd_container <- make_new_container(count_data=cm.raw, 
                                    meta_data=meta,
                                    params=param_list,
                                    label_donor_sex = F)

itd_container %<>% form_tensor(donor_min_cells=5,
                               norm_method='trim', 
                               scale_factor=10000,
                               vargenes_method='norm_var_pvals', 
                               vargenes_thresh=.4,
                               scale_var = TRUE, 
                               var_scale_power = 0.5)
## parsing data matrix by cell/tissue type...
## cleaning data...
## Keeping 18 donors. All donors have at least 5 cells in each cell type included.
## collapsing count matrices from cells to donors (aka pseudobulk operation)...
## normalizing data...
## calculating gene overdispersion factors...
## selecting highly variable genes from each cell type...
## scaling variance...
## forming tensor...
## Complete!
print(length(itd_container[["all_vargenes"]]))
## [1] 617
itd_container %<>% run_tucker_ica(ranks=c(5,6),
                                  tucker_type = 'regular', 
                                  rotation_type = 'hybrid')

# get donor scores-metadata associations
itd_container %<>% get_meta_associations(vars_test=c('sex','pmi', "age", "brain_bank", "flowcell", "extract"),
                                         stat_use='pval')

# plot donor scores
itd_container %<>% plot_donor_matrix(meta_vars=c('sex','pmi', "age", "brain_bank", "flowcell", "extract"),
                                     show_donor_ids = TRUE,
                                     add_meta_associations='pval')

# show the donor scores heatmap
p <- itd_container$plots$donor_matrix

p

##              used   (Mb) gc trigger    (Mb)   max used    (Mb)
## Ncells   18310929  978.0   50794518  2712.8   50794518  2712.8
## Vcells 1087515368 8297.1 3851064181 29381.3 8457159004 64523.1

Extended Data Figure 6

For panels a-d, see scCODA.ipynb.

Load data

anno.major <- qread("anno_major.qs")
meta <- read.delim("metadata.tsv", sep = "\t")

anno.micro <- qread("anno_micro_pvm.qs") %>% 
  renameAnnotation("Steady-state","MIC_steady-state") %>% 
  renameAnnotation("Intermediate1","MIC_intermediate1") %>% 
  renameAnnotation("Intermediate2","MIC_intermediate2") %>% 
  renameAnnotation("Activated","MIC_activated")

anno.glia <- c("anno_astro.qs",
               "anno_oligo.qs",
               "anno_opc.qs") %>% 
  lapply(qread) %>% 
  Reduce(c, .) %>% 
  factor() %>% 
  renameAnnotation("Homeostatic_astrocytes", "AS_homeostatic") %>% 
  renameAnnotation("Reactive_astrocytes", "AS_reactive") %>% 
  renameAnnotation("Homeostatic_LINC01608", "OL_LINC01608") %>%
  renameAnnotation("Homeostatic_SLC5A11", "OL_SLC5A11") %>% 
  renameAnnotation("Reactive_SCGZ", "OL_SGCZ")

anno.neurons <- qread("anno_neurons.qs")

6e

# Minor final
anno.minor <- anno.major[!anno.major %in% c("PVMs", "Astrocytes", "Oligodendrocytes", "OPCs", "Microglia")] %>% 
  {factor(c(., anno.glia, anno.micro))} %>% 
  factor(levels = sort(levels(.)))

df <- data.frame(cid = names(anno.minor), anno = unname(anno.minor)) %>% 
  mutate(sample = strsplit(cid, "!!") %>% sget(1)) %>% 
  group_by(sample, anno) %>% 
  summarize(n = n()) %>% 
  mutate(prop = n / sum(n) * 100) %>% 
  ungroup() %>% 
  filter(anno %in% c("Inh. neurons", "Exc. neurons", "MSN")) %>% 
  mutate(age = meta$age[match(sample, meta$sample)]) %>% 
  mutate(age_bin = cut(age, breaks = c(0, 60, 70, 80, 90), labels = c("0-60", "60-70", "70-80", "80-90")),
         condition = grepl.replace(sample, c("CTRL", "MSA", "PD")))
## `summarise()` has grouped output by 'sample'. You can override using the
## `.groups` argument.
df %>% 
  ggplot(aes(age_bin, prop)) + 
  geom_boxplot() +
  geom_point(position = position_dodge(width = 0.85)) + 
  theme_bw() +
  facet_wrap(~ anno) -> p

p

Statistics

df %>% 
  group_by(anno) %>%
  rstatix::kruskal_test(prop ~ age_bin) %>% 
  mutate(padj = p.adjust(p, method = "BH"))
## # A tibble: 3 × 8
##   anno         .y.       n statistic    df     p method          padj
##   <fct>        <chr> <int>     <dbl> <int> <dbl> <chr>          <dbl>
## 1 Exc. neurons prop     15     0.788     3 0.852 Kruskal-Wallis 0.852
## 2 Inh. neurons prop     24     5.23      3 0.156 Kruskal-Wallis 0.376
## 3 MSN          prop     24     4.10      3 0.251 Kruskal-Wallis 0.376

6f

# Minor final
anno.minor <- anno.major[!anno.major %in% c("Inh. neurons", "MSN", "Exc. neurons", "PVMs", "Astrocytes", "Oligodendrocytes", "OPCs", "Microglia")] %>% 
  {factor(c(., anno.neurons, anno.glia, anno.micro))} %>% 
  factor(levels = sort(levels(.)))

df <- data.frame(cid = names(anno.minor), anno = unname(anno.minor)) %>% 
  mutate(sample = strsplit(cid, "!!") %>% sget(1)) %>% 
  group_by(sample, anno) %>% 
  summarize(n = n()) %>% 
  mutate(prop = n / sum(n) * 100) %>% 
  ungroup() %>% 
  filter(anno %in% levels(anno.micro)) %>% 
  mutate(age = meta$age[match(sample, meta$sample)]) %>% 
  mutate(age_bin = cut(age, breaks = c(0, 60, 70, 80, 90), labels = c("0-60", "60-70", "70-80", "80-90")),
         condition = grepl.replace(sample, c("CTRL", "MSA", "PD")))
## `summarise()` has grouped output by 'sample'. You can override using the
## `.groups` argument.
df %>% 
  ggplot(aes(age_bin, prop)) + 
  geom_boxplot() +
  geom_point(position = position_dodge(width = 0.85)) + 
  theme_bw() +
  facet_wrap(~ anno) -> p

p

Statistics

df %>% 
  group_by(anno) %>%
  rstatix::kruskal_test(prop ~ age_bin) %>% 
  mutate(padj = p.adjust(p, method = "BH"))
## # A tibble: 5 × 8
##   anno              .y.       n statistic    df      p method          padj
##   <fct>             <chr> <int>     <dbl> <int>  <dbl> <chr>          <dbl>
## 1 MIC_activated     prop     29      7.58     3 0.0556 Kruskal-Wallis 0.139
## 2 MIC_intermediate1 prop     29      4.58     3 0.206  Kruskal-Wallis 0.258
## 3 MIC_intermediate2 prop     29      8.94     3 0.0302 Kruskal-Wallis 0.139
## 4 MIC_steady-state  prop     29      2.78     3 0.427  Kruskal-Wallis 0.427
## 5 PVMs              prop     29      5.13     3 0.163  Kruskal-Wallis 0.258

Extended Data Figure 7

We’re calculating cosine similarities to annotated data from Garma et al.. The data are available here.

Prepare data

Here, we’re showing how we prepared the data. This will not be run again.

lp.raw <- loomR::connect("Interneurons_raw.loom", mode = "r+", skip.validate = T)

cm.raw <- lp.raw[["matrix"]][,] %>% 
  `dimnames<-`(list(
    lp.raw[["col_attrs/obs_names"]][], 
    lp.raw[["row_attrs/var_names"]][]
    ))

lp.raw$close()

lp.norm <- loomR::connect("Interneurons_processed.loom", mode = "r+", skip.validate = T)

anno <- setNames(
  lp.norm$col.attrs$`IN subclass`[],
  lp.norm$col.attrs$obs_names[]
) %>% 
  factor()
  
spc <- setNames(
  lp.norm$col.attrs$sample[],
  lp.norm$col.attrs$obs_names[]
) %>% 
  factor()

origin <- setNames(
  lp.norm$col.attrs$Origin[],
  lp.norm$col.attrs$obs_names[]
) %>% 
  factor()

area <- setNames(
  lp.norm$col.attrs$Region[],
  lp.norm$col.attrs$obs_names[]
) %>% 
  factor()

sex <- setNames(
  lp.norm$col.attrs$Sex[],
  lp.norm$col.attrs$obs_names[]
) %>% 
  factor()

umap <- `rownames<-`(lp.norm$col.attrs$X_umap[,] %>% t(),
                     lp.norm$col.attrs$obs_names[])

lp.norm$close()

Then, we need to prepare a Conos object of those data. This will not be run here.

preprocessed <- spc %>%
  split(names(.), .) %>%
  .[sapply(., length) > 50] %>%
  lapply(\(cid) cm.raw[cid, ]) %>%
  lapply(Matrix::t) %>%
  lapply(basicP2proc, get.largevis = F, get.tsne = F, make.geneknn = F, n.cores = 20)

con <- Conos$new(preprocessed, n.cores = 32)
con$embeddings$PCA$UMAP <- umap
con$embedding <- umap
con$clusters$leiden$groups <- anno
con$clusters$sex$groups <- sex
con$clusters$area$groups <- area
con$clusters$origin$groups <- origin
con$clusters$spc$groups <- spc

qsave(con, "con_garma.qs", nthreads = 10)

7a

# Our data
rydbirk.neurons.anno <- qread("anno_neurons.qs") %>% 
  factor(labels = paste("Rydbirk", levels(.), sep = "-"))

rydbirk.neurons <- qread("cao_neurons.qs", nthreads = 10)$data.object

rydbirk.neurons.pseudo <- rydbirk.neurons$getJointCountMatrix(raw = T) %>%
  sccore::collapseCellsByType(groups = rydbirk.neurons.anno, min.cell.count = 1) %>% 
  t() %>%
  apply(2, as, "integer")

rydbirk.neurons.pseudo %<>% 
  DESeq2::DESeqDataSetFromMatrix(., 
                                 colnames(.) %>% 
                                   strsplit("-") %>% 
                                   sget(1) %>% 
                                   data.frame() %>% 
                                   `dimnames<-`(list(colnames(rydbirk.neurons.pseudo), "group")), 
                                 design = ~ 1) %>% 
  DESeq2::estimateSizeFactors() %>% 
  DESeq2::counts(normalized = T)

# Garma data
con.garma <- qread("con_garma.qs", nthreads = 10)

garma.neurons.anno <- con.garma$clusters$anno$groups %>% 
  factor() %>% 
  factor(labels = paste("Garma", levels(.), sep = "-"))

garma.neurons.pseudo <- con.garma$getJointCountMatrix(raw = T) %>% 
  sccore::collapseCellsByType(groups = garma.neurons.anno, min.cell.count = 1) %>%
  t() %>%
  apply(2, as, "integer")

garma.neurons.pseudo %<>%
  DESeq2::DESeqDataSetFromMatrix(.,
                                 colnames(.) %>%
                                   strsplit("_") %>%
                                   sget(1) %>%
                                   data.frame() %>%
                                   `dimnames<-`(list(colnames(garma.neurons.pseudo), "group")),
                                 design = ~ 1) %>%
  DESeq2::estimateSizeFactors() %>%
  DESeq2::counts(normalized = T)

# We rotate Garma into Rydbirk sample PCA space
cm.rydbirk <- rydbirk.neurons.pseudo %>% 
  as.data.frame() %>%
  filter(rowSums(.) > 0)
cm.garma <- garma.neurons.pseudo %>% 
  as.data.frame() %>%
  filter(rowSums(.) > 0)

genesToKeep <- conos:::getOdGenesUniformly(append(con.garma$samples, rydbirk.neurons$samples), 50) %>%
  intersect(rownames(cm.rydbirk)) %>% 
  intersect(rownames(cm.garma))

pc.res <-
  cm.rydbirk[genesToKeep, ] %>%
  t() %>%
  prcomp(center = T,
         scale = T)

pc.tmp <- cm.garma[genesToKeep, ] %>%
  as.data.frame() %>%
  filter(rowSums(.) > 0) %>%
  t() %>%
  scale(pc.res$center, pc.res$scale) %*% pc.res$rotation
dat.plot <- rbind(pc.res$x, pc.tmp) %>%
  data.frame() %>% 
  mutate(id = rownames(.)) %>% 
  mutate(study = ifelse(grepl("Rydbirk", id), "Rydbirk", "Garma") %>% factor(),
         anno = strsplit(id, "-") %>% sget(2))

lsa::cosine(dat.plot %>% 
              select(-id, -study, -anno) %>%
              t()) %>%
  reshape2::melt() %>% 
  ggplot(aes(Var1, Var2, fill = value)) +
  geom_tile() +
  scale_fill_gradient2(low = "skyblue3", mid = "white", high = "deeppink3") +
  theme(axis.text.x = element_text(angle = 45, hjust = 1, vjust = 1)) +
  labs(title = "Top 50 OD genes, Garma PCA rotation into Rydbirk PCA space")

7b

cm.garma.raw <- con.garma$getJointCountMatrix(raw = T)
cm.rydbirk.raw <- rydbirk.neurons$getJointCountMatrix(raw = T)

idx <- intersect(colnames(cm.garma.raw), colnames(cm.rydbirk.raw))

cm.garma.ext <- cm.garma.raw %>% 
  sccore:::extendMatrix(idx)
cm.rydbirk.ext <- cm.rydbirk.raw %>% 
  sccore:::extendMatrix(idx)

cm.comb <- rbind(cm.garma.ext, cm.rydbirk.ext)
anno.comb <- c(garma.neurons.anno %>% .[names(.) %in% rownames(cm.comb)], rydbirk.neurons.anno %>% .[names(.) %in% rownames(cm.comb)]) %>% 
  factor()

cm.comb %<>% .[rownames(.) %in% names(anno.comb), ]

unique(
  c("GAD1","GAD2","PPP1R1B","SLC17A7","CHAT","CALB2","DCC","ID2","MEIS2","ST18","NKX2-1","NR2F2","PAX6","PVALB","RXFP1","SST","VIP","RELN","SATB2","DRD1","DRD2","FOXP2", "LRP8", "VLDLR") %>% 
  c("CCK", "VIP", "CXCL14", "CHAT", "PTHLH", "MOXD1", "CHST9", "TAC3", "SST", "NPY", "PVALB", "GRIK3", "DACH1", "GAD1", "GAD2")
) %>% 
  sccore::dotPlot(cm.comb, anno.comb) + scale_color_gradient(low = "grey80", high = "firebrick")
## Scale for colour is already present.
## Adding another scale for colour, which will replace the existing scale.

##              used    (Mb) gc trigger    (Mb)   max used    (Mb)
## Ncells   19653177  1049.6   50794518  2712.8   50794518  2712.8
## Vcells 1511024602 11528.3 3851075027 29381.4 8457159004 64523.1

Extended Data Figure 8

Prepare data

We’re calculating cosine similarities to annotated data from Kamath et al.. The data are available here.

cm <- Matrix::readMM("GSE178265_Homo_matrix.mtx") %>% 
  `dimnames<-`(list(
    read.delim("GSE178265_Homo_features.tsv", header = F)$V1,
    read.delim("GSE178265_Homo_bcd.tsv", header = F)$V1
  ))

metadata.raw <- read.delim("METADATA_PD.tsv", header = T)[-1, ]

cells.to.omit <- metadata.raw %>% 
  filter(donor_id %in% c("NIH1", "Rat1", "TreeShrew1")) %>% 
  pull(NAME)

meta.per.cell <- metadata.raw %>% 
  filter(!NAME %in% cells.to.omit)

meta.per.sample <- metadata.raw %>% 
  filter(!NAME %in% cells.to.omit) %>% 
  select(-NAME, -libname, -biosample_id) %>% 
  mutate(area = grepl.replace(organ__ontology_label, c("substantia nigra pars compacta", "caudate nucleus"), c("SN", "CN"))) %>% 
  mutate(donor_area = paste(donor_id, area, sep = "-")) %>% 
  .[match(unique(.$donor_area), .$donor_area), ]

cids.per.donor.area <- meta.per.cell %>% 
  mutate(area = grepl.replace(organ__ontology_label, c("substantia nigra pars compacta", "caudate nucleus"), c("SN", "CN"))) %>% 
  mutate(donor_area = paste(donor_id, area, sep = "-")) %$% 
  split(NAME, donor_area)

cm.list <- meta %>%
  sccore::plapply(\(mm) cm[, colnames(cm) %in% rownames(mm)], n.cores = 7)

cm.area.donor <- cm.list %>%
  lapply(\(area) sccore::plapply(cids.per.donor.area, \(cid) area[, colnames(area) %in% cid], n.cores = 22)) %>%
  lapply(\(cms) cms[sapply(cms, ncol) > 30]) %>%
  lapply(lapply, as, "CsparseMatrix") %>%
  lapply(lapply, basicP2proc, get.largevis = F, get.tsne = F, make.geneknn = F, n.cores = 20)
# Annotations
embeddings <- dir(pattern = "UMAP")

embeddings %<>% 
  lapply(\(type) read.delim(type, header = T, row.names = 1)) %>% 
  setNames(embeddings %>% strsplit("_") %>% sget(1)) %>% 
  lapply(dplyr::rename, subtype = Cell_Type)

con.list <- names(embeddings) %>% 
  lapply(\(nn) {
    con <- Conos$new(cm.area.donor[[nn]], n.cores = 32)
    con$embeddings <- list(UMAP = embeddings[[nn]][, -3])
    con$embedding <- con$embeddings[[1]]
    con$clusters <- list(leiden = list(groups = embeddings[[nn]] %>% {setNames(pull(., subtype), rownames(.))}))
    
    return(con)
  }) %>% 
  setNames(names(embeddings))
##              used    (Mb) gc trigger    (Mb)   max used    (Mb)
## Ncells   22127460  1181.8   50794518  2712.8   50794518  2712.8
## Vcells 6138075275 46829.8 8124742250 61986.9 8457159004 64523.1

Plot

# Our data
rydbirk.neurons.anno <- qread("anno_neurons.qs") %>% 
  factor(labels = paste("Rydbirk", levels(.), sep = "-"))

rydbirk.neurons <- qread("cao_neurons.qs", nthreads = 10)$data.object

rydbirk.neurons.pseudo <- rydbirk.neurons$getJointCountMatrix(raw = T) %>%
  sccore::collapseCellsByType(groups = rydbirk.neurons.anno, min.cell.count = 1) %>% 
  t() %>%
  apply(2, as, "integer")

rydbirk.neurons.pseudo %<>% 
  DESeq2::DESeqDataSetFromMatrix(., 
                                 colnames(.) %>% 
                                   strsplit("-") %>% 
                                   sget(1) %>% 
                                   data.frame() %>% 
                                   `dimnames<-`(list(colnames(rydbirk.neurons.pseudo), "group")), 
                                 design = ~ 1) %>% 
  DESeq2::estimateSizeFactors() %>% 
  DESeq2::counts(normalized = T)

# Kamath data
kamath.neurons.anno1 <- con.list$da$clusters$leiden$groups %>% 
  factor() %>% 
  factor(labels = paste("Kamath", levels(.), sep = "-"))

kamath.neurons.anno2 <- con.list$nonda$clusters$leiden$groups %>% 
  factor() %>% 
  factor(labels = paste("Kamath", levels(.), sep = "-"))

kamath.neurons.anno <- factor(c(kamath.neurons.anno1, kamath.neurons.anno2))


kamath.cm1 <- con.list$da$getJointCountMatrix(raw = T)
kamath.cm2 <- con.list$nonda$getJointCountMatrix(raw = T)

kamath.neurons.pseudo <- rbind(kamath.cm1, kamath.cm2) %>%
  sccore::collapseCellsByType(groups = kamath.neurons.anno, min.cell.count = 1) %>%
  t() %>%
  apply(2, as, "integer")

kamath.neurons.pseudo %<>%
  DESeq2::DESeqDataSetFromMatrix(.,
                                 colnames(.) %>%
                                   strsplit("_") %>%
                                   sget(1) %>%
                                   data.frame() %>%
                                   `dimnames<-`(list(colnames(kamath.neurons.pseudo), "group")),
                                 design = ~ 1) %>%
  DESeq2::estimateSizeFactors() %>%
  DESeq2::counts(normalized = T)

# We rotate Kamath into Rydbirk sample PCA space
cm.rydbirk <- rydbirk.neurons.pseudo %>% 
  as.data.frame() %>%
  filter(rowSums(.) > 0)
cm.kamath <- kamath.neurons.pseudo %>% 
  as.data.frame() %>%
  filter(rowSums(.) > 0)
genesToKeep <- conos:::getOdGenesUniformly(append(con.list$da$samples, rydbirk.neurons$samples) %>% append(con.list$nonda$samples), 100) %>% 
  intersect(rownames(cm.rydbirk)) %>% 
  intersect(rownames(cm.kamath))

pc.res <-
  cm.rydbirk[genesToKeep, ] %>%
  t() %>%
  prcomp(center = T,
         scale = T)

pc.tmp <- cm.kamath[genesToKeep, ] %>%
  as.data.frame() %>%
  filter(rowSums(.) > 0) %>%
  t() %>%
  scale(pc.res$center, pc.res$scale) %*% pc.res$rotation

# Plot
dat.plot <- rbind(pc.res$x, pc.tmp) %>%
  data.frame() %>% 
  mutate(id = rownames(.)) %>% 
  mutate(study = ifelse(grepl("Rydbirk", id), "Rydbirk", "Kamath") %>% factor(),
         anno = strsplit(id, "-") %>% sget(2))

lsa::cosine(dat.plot %>% 
              select(-id, -study, -anno) %>% 
              t()) %>%
  reshape2::melt() %>% 
  ggplot(aes(Var1, Var2, fill = value)) +
  geom_tile() +
  scale_fill_gradient2(low = "skyblue3", mid = "white", high = "deeppink3") +
  theme(axis.text.x = element_text(angle = 45, hjust = 1, vjust = 1)) +
  labs(title = "Top 100 OD genes, Kamath PCA rotation into Rydbirk PCA space")

##              used    (Mb)  gc trigger    (Mb)    max used    (Mb)
## Ncells   20802381  1111.0    50794518  2712.8    50794518  2712.8
## Vcells 5947788236 45378.1 11699804840 89262.5 11285321700 86100.2

Extended Data Figure 9

Here, we use public data from Smajic et al.. We isolated neurons and used the available annotation (IPDCO_hg_midbrain_cell.tsv).

con.smajic <- qread("smajic_neurons.qs", nthreads = 10)
con.neurons <- qread("cao_neurons.qs", nthreads = 10)$data.object

# Get Smajic annotation
tmp <- data.frame(cid = rownames(con.smajic$embedding)) %>% 
  mutate(barcode = strsplit(cid, "_") %>% 
           sget(3))

anno <- data.table::fread("IPDCO_hg_midbrain_cell.tsv", header = T) %>% 
  mutate(barcode = strsplit(barcode, "_") %>% 
           sget(1)) %>% 
  mutate(cid = tmp$cid[match(barcode, tmp$barcode)]) %>% 
  filter(!is.na(cid)) %>% 
  pull(cell_ontology, cid) %>%
  .[!. %in% c("Astrocytes", "Endothelial cells", "Ependymal", "Microglia", "Oligodendrocytes", "OPCs", "Pericytes")] %>% 
  factor()
  
cowplot::plot_grid(plotlist = list(
  con.smajic$plotGraph(groups = anno, title = "Smajic neurons, annotation", font.size = 3, shuffle.colors = T, plot.na = F) + theme(line = element_blank()),
  con.neurons$plotGraph(groups = anno.neurons, title = "Rydbirk neurons, annotation", font.size = 3) + theme(line = element_blank()),
  con.smajic$plotGraph(groups = anno, gene = "RELN", title = "RELN expression", plot.na = F) + theme(line = element_blank()),
  con.neurons$plotGraph(gene = "RELN", title = "RELN expression", plot.na = F) + theme(line = element_blank()),
  con.smajic$plotGraph(groups = anno, gene = "CADPS2", title = "CADPS2 expression", plot.na = F) + theme(line = element_blank()),
  con.neurons$plotGraph(gene = "CADPS2", title = "CADPS2 expression", plot.na = F) + theme(line = element_blank())), 
  nrow = 3)

##              used    (Mb)  gc trigger    (Mb)    max used    (Mb)
## Ncells   20843442  1113.2    50794518  2712.8    50794518  2712.8
## Vcells 5948619749 45384.4 11699804840 89262.5 11285321700 86100.2

Extended Data Figure 10

These plots were created with results from LIANA in Python.

Extended Data Figure 11

cm.merged <- con$getJointCountMatrix(raw = T) %>% 
  Matrix::t() %>% 
  .[, colnames(.) %in% names(anno.major)]

# Create sample-wise annotation
anno.donor <- con$getDatasetPerCell()[colnames(cm.merged)]
anno.subtype <- anno.major %>% 
  .[!is.na(.)] %>% 
  factor()

idx <- intersect(anno.donor %>% names(), anno.subtype %>% names())

anno.donor %<>% .[idx]
anno.subtype %<>% .[idx] %>% 
  {`names<-`(as.character(.), names(.))}

anno.final <- paste0(anno.donor,"!!",anno.subtype) %>% 
  `names<-`(anno.donor %>% names())

# Create pseudo CM
cm.pseudo <- sccore::collapseCellsByType(cm.merged %>% Matrix::t(), 
                                         groups = anno.final, min.cell.count = 1) %>% 
  t() %>%
  apply(2, as, "integer")

cm.pseudo %<>% 
  {. + 1} %>% 
  DESeq2::DESeqDataSetFromMatrix(., 
                                 colnames(.) %>% 
                                   strsplit("_") %>% 
                                   sget(1) %>% 
                                   data.frame() %>% 
                                   `dimnames<-`(list(colnames(cm.pseudo), "group")), 
                                 design = ~ group) %>% 
  DESeq2::estimateSizeFactors() %>% 
  DESeq2::counts(normalized = T)
## converting counts to integer mode
## Warning in DESeqDataSet(se, design = design, ignoreRank): some variables in
## design formula are characters, converting to factors
c("TSPO", "COQ2", "MAPT", "FBXO47", "ELOVL7", "EDN1", "GAB1", "TENM2", "RABGEF1", "PLA2G4C", "INPP4B", "ZIC1", "ZIC2", "ZIC3", "ZIC4", "SNCA", "TPPP", "TPPP") %>% 
  {Map(\(gene, leg) plotGenePseudoBulk(gene, cm.pseudo, leg), gene = ., leg = c(rep(F, 17), T))} %>% 
  cowplot::plot_grid(plotlist = ., ncol = 3)
## Warning in stat_compare_means(aes(fill = condition), method = "kruskal", : Ignoring unknown aesthetics: fill
## Ignoring unknown aesthetics: fill
## Ignoring unknown aesthetics: fill
## Ignoring unknown aesthetics: fill
## Ignoring unknown aesthetics: fill
## Ignoring unknown aesthetics: fill
## Ignoring unknown aesthetics: fill
## Ignoring unknown aesthetics: fill
## Ignoring unknown aesthetics: fill
## Ignoring unknown aesthetics: fill
## Ignoring unknown aesthetics: fill
## Ignoring unknown aesthetics: fill
## Ignoring unknown aesthetics: fill
## Ignoring unknown aesthetics: fill
## Ignoring unknown aesthetics: fill
## Ignoring unknown aesthetics: fill
## Ignoring unknown aesthetics: fill
## Ignoring unknown aesthetics: fill

res.old <- c("TSPO", "COQ2", "MAPT", "FBXO47", "ELOVL7", "EDN1", "GAB1", "TENM2", "RABGEF1", "PLA2G4C", "INPP4B", "ZIC1", "ZIC2", "ZIC3", "ZIC4", "SNCA", "TPPP") %>% 
  lapply(\(gene) {
    idx <- cm.pseudo %>% 
    colnames() %>% 
    data.frame(id = .) %>% 
    mutate(condition = strsplit(id, "_|!!") %>% sget(1),
           ct = strsplit(id, "!!") %>% sget(2)) %>% 
    mutate(ord = order(condition, ct))
  
  x <- cm.pseudo %>% 
    .[match(gene, rownames(.)), match(colnames(na.omit(.)), colnames(.))] %>%
    .[idx$ord]
  
  plot.dat <- x %>% 
    {data.frame(sample = names(.), 
                value = unname(.))} %>%
    mutate(anno = strsplit(sample, "!!") %>% 
             sget(2),
           condition = strsplit(sample, "!!|_") %>% 
             sget(1)) %>%
    mutate(anno = factor(anno))
  
  stat.test <- plot.dat %>% 
    group_by(anno) %>% 
    rstatix::wilcox_test(value ~ condition)
  
  stat.omnibus <- plot.dat %>% 
    group_by(anno) %>% 
    rstatix::kruskal_test(value ~ condition)
  
  out <- data.frame(Gene = gene, celltype = stat.test$anno, omnibus.old = stat.omnibus$p, contrast = sapply(seq(nrow(stat.test)), \(x) paste(stat.test$group1[x], stat.test$group2[x], sep = " - ")), padj.old = stat.test$p.adj)
  
  return(out)
  }) %>% 
  bind_rows()

Extended Data Figures 12 & 13

Load and prepare data

cao <- qread("cao_micro_pvm.qs", nthreads = 10)

anno.sort <- anno.micro[rownames(cao$data.object$embedding)]
anno.sel <- anno.sort[!anno.sort %in% c("MIC_intermediate1", "PVMs")] %>% factor()

emb <- cao$data.object$embedding %>% 
  `colnames<-`(c("UMAP1","UMAP2")) %>% 
  .[rownames(.) %in% names(anno.sel), ]

sds_obj <- slingshot(emb, 
                     anno.sel, 
                     start.clus = "MIC_steady-state",
                     stretch = 0
)

sds <- as.SlingshotDataSet(sds_obj)

pseudotime <- sds_obj@assays@data@listData$pseudotime[, 1]

12a

plot.df <- cao$data.object$embedding %>% 
  as.data.frame() %>% 
  .[rownames(.) %in% names(sds_obj), ] %>% 
  `colnames<-`(c("UMAP1","UMAP2")) %>% 
  mutate(., annotation = anno.micro[rownames(.)] %>% as.factor()) %>% 
  .[complete.cases(.),]

ldata <- getTscanTrajectory(cao$data.object, anno.sel)

cao$data.object$embedding %>% 
  as.data.frame() %>% 
  mutate(., unname(anno.micro[rownames(.)])) %>% 
  setNames(c("UMAP1", "UMAP2", "annotation")) %>% 
  mutate(., pseudotime = pseudotime[rownames(.)]) %>%
  ggplot() +
  geom_point(aes(UMAP1, UMAP2, col = pseudotime), size = 0.3) +
  geom_line(data = ldata1, aes(UMAP1, UMAP2, group = edge), size = 1) +
  theme_bw() +
  theme(line = element_blank()) +
  scale_color_gradient(low = "navyblue", high = "orange") +
  labs(col = "Pseudotime", x = "UMAP1", y = "UMAP2")# +

12b

Here, we show the code for running the generalized additive mixed model. It takes a significant amount of time to run, we provide data in pseudotime_micro_l1.qs.

mat <- cao$data.object$getJointCountMatrix() %>%
  .[match(names(sds_obj), rownames(.)), colSums(.) > 2e2] %>% 
  .[, !grepl(pattern = "RPL|RPS|MT-", colnames(.))]

mt <- mat %>%
  as.matrix() %>%
  as.data.frame() %>%
  tibble::rownames_to_column() %>%
  {message("Mutating"); mutate(.,
                               pseudotime = pseudotime[rowname],
                               condition = cao$data.object$getDatasetPerCell()[rowname] %>%
                                 as.character() %>%
                                 strsplit("_") %>%
                                 sget(1),
                               sample = strsplit(rowname, "!!") %>%
                                 sget(1))} %>%
  .[complete.cases(.), ] %>%
  {message("Melting"); reshape2::melt(., id.vars = c("rowname", "pseudotime", "condition", "sample"))} %$%
  {message("Splitting"); split(., variable)}
res <- mt %>% 
  sccore::plapply(\(x) {
    fit_full <- gamm4(data = x, formula = value ~ s(pseudotime, by = factor(condition)), random = ~(1 | sample), REML = F)
    fit_reduced <- gamm4(data = x, formula = value ~ s(pseudotime), random = ~(1 | sample), REML = F)
    fit_none <- gamm4(data = x, formula = value ~ 1, random = ~(1 | sample), REML = F)
    
    ann <- anova(fit_full$mer, fit_reduced$mer, fit_none$mer)
    
    if (any(ann$`Pr(>Chisq)` <= 0.05, na.rm = T)) {
      residuals <- predict(fit_full$gam, se.fit = T)$fit %>% 
        unname()
      
      r.sq <- c(summary(fit_full$gam)$r.sq, summary(fit_reduced$gam)$r.sq) %>% 
        setNames(c("full", "reduced"))
      
      out <- list(anova = ann,
                  residuals = residuals,
                  r.sq = r.sq)
      
      return(out)
    }
  }, n.cores = 40, mc.preschedule = T, mc.cleanup = T, progress = F) %>%
  .[!sapply(., is.null)]

qsave(res, "pseudotime_micro_l1.qs")
res <- qread("pseudotime_micro_l1.qs")

rsq.pseudo <- res %>% 
  sapply(\(gene) gene$r.sq[1]) %>% 
  sort(decreasing = T)

res.filter <- res[names(rsq.pseudo)[seq(50)] %>% strsplit(".", fixed = T) %>% sget(1)]

cm.merged <- cao$data.object$getJointCountMatrix() %>% 
  .[match(names(pseudotime), rownames(.)), colnames(.) %in% names(res.filter)] %>% 
  .[rowSums(.) > 0, ]

## Predict smoothend expression 
pseudotime.both <- pseudotime[rownames(cm.merged)]
weights.both <- sds_obj@assays@data@listData$weights %>% .[match(names(pseudotime.both), rownames(.)), 1] + 1E-7

scFit <- cm.merged %>% 
  Matrix::t() %>% 
  tradeSeq::fitGAM(pseudotime = pseudotime.both, cellWeights = weights.both, verbose = T)
## Warning in max(abs(logR)): no non-missing arguments to max; returning -Inf
Smooth <- tradeSeq::predictSmooth(scFit, gene = colnames(cm.merged), tidy = F, n=100)

# Average across replicates and scale
Smooth <- t(scale(t(Smooth)))

# Seriate the results
Smooth <- Smooth[ seriation::get_order(seriation::seriate(Smooth, method="PCA_angle")), ]

# Create heatmap
col_fun = circlize::colorRamp2(c(-4, 0, 4), c("navy", "white", "firebrick"))

Heatmap(Smooth, 
        col = col_fun,
        cluster_columns=F, 
        cluster_rows=F, 
        show_column_names = F, 
        row_names_gp = grid::gpar(fontsize = 5))

13

cpc <- pseudotime %>% 
  names() %>% 
  strsplit("_") %>% 
  sget(1) %>% 
  factor()

res.filter %>% 
  lget("residuals") %>% 
  lapply(as.data.frame) %>% 
  lapply(setNames, "residuals") %>% 
  lapply(mutate, group = cpc, pseudotime = pseudotime) %>% 
  data.table::rbindlist(idcol = "gene") %>% 
  ggplot(aes(pseudotime, residuals, col = group)) +
  geom_smooth() +
  theme_bw() +
  facet_wrap(~gene, ncol = 5, scales = "free")
## `geom_smooth()` using method = 'gam' and formula = 'y ~ s(x, bs = "cs")'

Extended Data Figure 15

Here, we use public data from Feleke et al.. We isolated microglia based on CSF1R expression and performed a quick annotation using AIF1 expression to identify activated microglia.

con.micro <- qread("feleke_micro.qs")

anno.micro <- getConosCluster(con.micro) %>% 
  factor(labels = c("Cluster1", "Activated", "Cluster2", "Cluster2"))

# We set sample groups manually as it can't be inferred from sample names
sample.groups <- c("CTRL", "CTRL", "DLB", "DLB", "DLB", "DLB", "DLB", "PDD", "PDD", "PD", "PDD", "PD", "PDD", "PD", "PDD", "PDD", "DLB", "PD", "PDD", "PD", "DLB", "PD", "PD", "CTRL", "CTRL", "CTRL", "CTRL", "CTRL") %>% 
  setNames(names(con.micro$samples))
con.clone <- con.micro$clone()
con.clone$samples <- con.micro$samples %>% .[which(sample.groups %in% c("CTRL", "PD"))]

sample.groups.clone <- sample.groups %>% .[. %in% c("CTRL", "PD")]
cao <- Cacoa$new(con.clone, sample.groups = sample.groups.clone, ref.level = "CTRL", target.level = "PD", cell.groups = anno.micro, n.cores = 32)

p <- cao$plotCellGroupSizes() + theme(line = element_blank())
p

cao$estimateCellLoadings()
p <- cao$plotCellLoadings()
## Warning: Removed 651 rows containing non-finite outside the scale range
## (`stat_boxplot()`).
## Warning: Removed 651 rows containing missing values or values outside the scale range
## (`geom_point()`).
## Warning: Removed 1 row containing missing values or values outside the scale range
## (`geom_text()`).
## Removed 1 row containing missing values or values outside the scale range
## (`geom_text()`).
p

p <- con.clone$plotGraph(size = 0.3, groups = anno.micro, font.size = 5) + theme(line = element_blank())
p

p <- con.clone$plotGraph(gene = "AIF1", title = "AIF1", plot.na = F) + theme(line = element_blank())
p

##              used    (Mb)  gc trigger    (Mb)    max used    (Mb)
## Ncells   20915887  1117.1    50794518  2712.8    50794518  2712.8
## Vcells 5959861107 45470.2 11699804840 89262.5 11653179372 88906.8

Extended Data Figure 16

16a

We provide the results output from scHLAcount in scHLAcount.zip.

samples <- dir("scHLAcount/rydbirk/") %>% .[grepl(pattern = "_", .)]

cms <- lapply(samples, function(x) {
  labels <- read.delim(paste0("scHLAcount/rydbirk/",x,"/labels.tsv"), header = F) %>% .[,1]
  barcodes <- read.delim(paste0("scHLAcount/rydbirk/",x,"/barcodes.tsv"), header = F) %>% 
    .[,1] %>%
    sapply(function(y) paste0(x,"one_",y))
  
  mtx <- Matrix::readMM(paste0("scHLAcount/rydbirk/",x,"/count_matrix.mtx")) %>% 
    t() %>%
    `dimnames<-`(list(barcodes, labels)) %>%
    .[rowSums(.) > 0,] # Remove cells with no counts
  
  tmp <- colnames(mtx) %>%
    strsplit("*", T) %>%
    sapply(`[[`, 1)
  
  # If more than one allele per HLA type has been detected, sum per HLA type
  if(any(tmp %>% table() %>% as.numeric() > 1)) {
    cs <- colSums(mtx)
    alleles <- unique(tmp)
    
    mtx <- lapply(alleles, function(y) mtx[,tmp == y]) %>%
      lapply(function(y) {
        if(class(y) == "dgTMatrix") rowSums(y) else y
      }) %>% 
      unlist() %>% 
      Matrix(nrow = nrow(mtx)) %>%
      `rownames<-`(rownames(mtx))
  }
  
  colnames(mtx) <- unique(tmp)
  return(mtx)
}) %>%
  setNames(samples)

Calculations

# Cell-wise
cell.counts <- sapply(cms, rowSums) %>% 
  unname() %>% 
  unlist() %>% 
  `names<-`(., gsub("one_","!!",names(.)))

depth <- getConosDepth(con) %>%
  .[match(names(cell.counts), names(.))] %>%
  {data.frame(cell = names(.),
              sample = names(.) %>% strsplit("!!", T) %>% sapply(`[[`, 1),
              depth = unname(.))}

depth[is.na(depth)] <- 0

cell.group <- grepl.replace(names(cell.counts), patterns = c("CTRL","PD","MSA")) %>% as.factor()

mhc1.raw <- sapply(cms, function(x) {
  tmp <- x[,colnames(x) %in% c("A","B","C")]
  if(class(tmp) != "numeric") rowSums(tmp) else tmp
}) %>% 
  unname() %>% 
  unlist() %>% 
  `names<-`(., gsub("one_","!!",names(.)))

mhc2.raw <- sapply(cms, function(x) {
  tmp <- x[,!colnames(x) %in% c("A","B","C")]
  if(class(tmp) != "numeric") rowSums(tmp) else tmp
}) %>% 
  unname() %>% 
  unlist() %>% 
  `names<-`(., gsub("one_","!!",names(.)))

cell.df <- data.frame(group = cell.group,
                      counts = cell.counts,
                      mhc1 = mhc1.raw,
                      mhc2 = mhc2.raw,
                      depth = depth$depth) %>%
  mutate(., anno = anno.major[match(rownames(.), names(anno.major))],
         sample = rownames(.) %>% sapply(strsplit, "!!", T) %>% sapply(`[[`, 1)) %>%
  mutate(type = grepl.replace(anno %>% as.character(), levels(anno), c("Brain","Brain","Brain","Peripheral","Peripheral","Peripheral","Brain","Brain","Brain","Brain"))) %>%
  `rownames<-`(names(cell.counts)) %>%
  filter(!is.na(anno)) 

Load Smajic data

samples <- dir("scHLAcount/smajic") %>% .[grepl(pattern = "SRR", .)]

cms <- lapply(samples, function(x) {
  labels <- read.delim(paste0("scHLAcount/smajic/",x,"/labels.tsv"), header = F) %>% .[,1]
  barcodes <- read.delim(paste0("scHLAcount/smajic/",x,"/barcodes.tsv"), header = F) %>% 
    .[,1] %>%
    sapply(function(y) paste0(x,"_",y,"_1"))
  mtx <- Matrix::readMM(paste0("scHLAcount/smajic/",x,"/count_matrix.mtx")) %>% 
    t() %>%
    `dimnames<-`(list(barcodes, labels)) %>%
    .[rowSums(.) > 0,] # Remove cells with no counts
  
  tmp <- colnames(mtx) %>%
    strsplit("*", T) %>%
    sapply(`[[`, 1)
  
  # If more than one allele per HLA type has been detected, sum per HLA type
  if(any(tmp %>% table() %>% as.numeric() > 1)) {
    cs <- colSums(mtx)
    alleles <- unique(tmp)
    
    mtx <- lapply(alleles, function(y) mtx[,tmp == y]) %>%
      lapply(function(y) {
        if(class(y) == "dgTMatrix") rowSums(y) else y
      }) %>% 
      unlist() %>% 
      Matrix(nrow = nrow(mtx)) %>%
      `rownames<-`(rownames(mtx))
  }
  
  colnames(mtx) <- unique(tmp)
  return(mtx)
}) %>%
  setNames(samples)

# Correct naming
name.df <- data.frame(samples = samples, ids = c(rep("PD",6) %>% mapply(function(x,y) paste0(x,y), x = ., y = c(1,1,2:5)),
                                                 rep("CTRL",6) %>% mapply(function(x,y) paste0(x,y), x = ., y = 1:6)))

anno <- readRDS(paste0("scHLAcount/smajic/anno.sma.rds")) %>%
  setNames(., names(.) %>% 
             strsplit("_", T) %>% 
             lapply(function(x) {
               if(grepl("C", x[[1]])) x[[1]] %<>% gsub("C","CTRL", .)
               return(x)
             }) %>%
             sapply(function(x) paste0(x[[1]],"_",x[[2]])))

cms <- 1:12 %>% 
  lapply(function(x) {
    rnames <- cms[[x]] %>%
      rownames() %>%
      names() %>%
      sapply(function(y) paste0(name.df[x,2],"_",y)) %>%
      unname()
    
    rownames(cms[[x]]) <- rnames
    return(cms[[x]])
  }) %>%
  setNames(names(cms))

Calculations

cell.df.rydbirk <- cell.df

# Cell-wise
cell.counts <- sapply(cms, rowSums) %>% 
  unname() %>% 
  unlist()

rem <- cell.counts %>%
  names() %>%
  table() %>%
  .[. > 1] %>%
  names()

cell.counts %<>% .[!names(.) %in% rem]

cell.group <- grepl.replace(names(cell.counts), patterns = c("CTRL","PD")) %>% as.factor()

mhc1.raw <- sapply(cms, function(x) {
  tmp <- x[,colnames(x) %in% c("A","B","C")]
  if(class(tmp) != "numeric") rowSums(tmp) else tmp
}) %>% 
  unname() %>% 
  unlist() %>%
  .[!names(.) %in% rem]

mhc2.raw <- sapply(cms, function(x) {
  tmp <- x[,!colnames(x) %in% c("A","B","C")]
  if(class(tmp) != "numeric") rowSums(tmp) else tmp
}) %>% 
  unname() %>% 
  unlist() %>%
  .[!names(.) %in% rem]

cell.df <- data.frame(group = cell.group,
                      counts = cell.counts,
                      mhc1 = mhc1.raw,
                      mhc2 = mhc2.raw) %>%
  mutate(., anno = anno[match(rownames(.), names(anno))],
         sample = rownames(.) %>% sapply(strsplit, "_", T) %>% sapply(`[[`, 1)) %>%
  `rownames<-`(names(cell.counts)) %>%
  filter(!is.na(anno))

Integrate with our data and plot

cell.df.int <- rbind(cell.df.rydbirk %>% 
                       select(-c("depth","type")) %>%
                       mutate(origin = "Rydbirk"),
                     cell.df %>% 
                       mutate(anno = anno %>% factor(labels = c("Astrocytes","Exc. neurons","Inh. neurons","Endothelial","Eppendymal","Exc. neurons","Inh. neurons","Inh. neurons","Microglia","Oligodendrocytes","OPCs","Pericytes")),
                              origin = "Smajic")) %>%
  filter(! anno %in% c("Blood_immune","Endothelial","Eppendymal","Mural","Pericytes","Pericytes/endothelial","Immune")) %>%
  filter(!is.na(anno)) %>% 
  mutate(anno = anno %>% factor(),
         origin = origin %>% factor(),
         anno = anno %>% unname() %>% factor(),
         sample = sample %>% unname())

cell.anno.df <-
  cell.df.int %>%
  group_by(anno, sample, group, origin) %>%
  summarise(mhc1 = mean(mhc1),
            mhc2 = mean(mhc2),
            counts = mean(counts),
            cells = length(sample)) %>%
  as.data.frame() %>% 
  select(-cells, -counts, -sample)
## `summarise()` has grouped output by 'anno', 'sample', 'group'. You can override
## using the `.groups` argument.
p.text1 <- data.frame(signif = c("n.s.", "n.s.", "n.s.", "n.s.", "n.s.", "0.040", "0.032", "n.s."), 
                      mhc1 = 4.5,
                      anno = levels(cell.anno.df$anno))

p.text2 <- data.frame(signif = c("0.00057"), 
                      mhc2 = 8.5,
                      anno = "             Microglia")

cowplot::plot_grid(plotlist = list(
  ggplot(cell.anno.df, 
         aes(anno, 
             mhc1)) +
    geom_boxplot(outlier.shape = NA, 
                 aes(fill = group)) +
    geom_point(position = position_jitterdodge(jitter.width = 0.2), 
               aes(fill = group, 
                   col = origin, 
                   shape = group),
               size = 2) +
    scale_color_manual(values = c("black",
                                  "grey50")) +
    labs(x = "", 
         y = "Mean counts per sample", 
         title = "MHC-I counts", 
         fill = "", 
         col = "", 
         shape = " ") +
    geom_text(data = p.text1, aes(label = signif)) +
    theme_bw() +
    scale_fill_manual(values = pal.major) +
    theme(legend.position = "none", 
          axis.text.x = element_text(angle = 45, hjust = 1, vjust = 1),
          line = element_blank()),
  ggplot(cell.anno.df %>% filter(anno == "Microglia") %>% mutate(anno = "             Microglia"), aes(anno, mhc2)) +
    geom_boxplot(outlier.shape = NA, aes(fill = group)) +
    geom_point(position = position_jitterdodge(jitter.width = 0.2), aes(fill = group, col = origin, shape = group), size = 2) +
    scale_color_manual(values = c("black","grey50")) +
    labs(x = "", y = "", title = "MHC-II counts", fill = "", col = "", shape = " ") +
    geom_text(data = p.text2, aes(label = signif)) +
    theme_bw() +
    scale_fill_manual(values = pal.major) +
    theme(axis.text.x = element_text(angle = 45, hjust = 1, vjust = 1),
          line = element_blank())
), ncol = 2, rel_widths = c(2,0.9))

Kruskal-Wallis, asses differences per cell type

cell.anno.df %>% 
  group_by(anno) %>% 
  kruskal_test(mhc1 ~ group) %>% 
  data.frame()
##               anno  .y.  n statistic df      p         method
## 1       Astrocytes mhc1 38 5.4099627  2 0.0669 Kruskal-Wallis
## 2        Microglia mhc1 37 3.5900071  2 0.1660 Kruskal-Wallis
## 3 Oligodendrocytes mhc1 38 4.0883941  2 0.1290 Kruskal-Wallis
## 4             PVMs mhc1 24 3.5403947  2 0.1700 Kruskal-Wallis
## 5             OPCs mhc1 38 0.9689609  2 0.6160 Kruskal-Wallis
## 6     Inh. neurons mhc1 31 6.4335165  2 0.0401 Kruskal-Wallis
## 7     Exc. neurons mhc1 22 6.8860651  2 0.0320 Kruskal-Wallis
## 8              MSN mhc1 21 4.9345432  2 0.0848 Kruskal-Wallis
cell.anno.df %>% 
  filter(anno == "Microglia") %>% 
  group_by(anno) %>% 
  kruskal_test(mhc2 ~ group) %>% 
  data.frame()
##        anno  .y.  n statistic df        p         method
## 1 Microglia mhc2 37  14.92493  2 0.000574 Kruskal-Wallis

Wilcoxon, assess differences between our data and Smajic per condition per cell type

cell.anno.df %>% 
  filter(group != "MSA", 
         !anno %in% c("MSN","PVMs")) %>%
  group_by(anno, group) %>% 
  wilcox_test(mhc1 ~ origin) %>% 
  data.frame()
##                anno group  .y.  group1 group2 n1 n2 statistic     p
## 1        Astrocytes  CTRL mhc1 Rydbirk Smajic 10  6        27 0.792
## 2        Astrocytes    PD mhc1 Rydbirk Smajic 11  5        26 0.913
## 3         Microglia  CTRL mhc1 Rydbirk Smajic  9  6        21 0.529
## 4         Microglia    PD mhc1 Rydbirk Smajic 11  5        30 0.827
## 5  Oligodendrocytes  CTRL mhc1 Rydbirk Smajic 10  6        34 0.713
## 6  Oligodendrocytes    PD mhc1 Rydbirk Smajic 11  5        26 0.913
## 7              OPCs  CTRL mhc1 Rydbirk Smajic 10  6        15 0.118
## 8              OPCs    PD mhc1 Rydbirk Smajic 11  5        31 0.743
## 9      Inh. neurons  CTRL mhc1 Rydbirk Smajic  8  6        10 0.080
## 10     Inh. neurons    PD mhc1 Rydbirk Smajic  7  5        23 0.432
## 11     Exc. neurons  CTRL mhc1 Rydbirk Smajic  6  6        10 0.240
## 12     Exc. neurons    PD mhc1 Rydbirk Smajic  3  5         5 0.549
cell.anno.df %>% 
  filter(group != "MSA", 
         anno == "Microglia") %>%
  group_by(anno, group) %>% 
  wilcox_test(mhc2 ~ origin) %>% 
  data.frame()
##        anno group  .y.  group1 group2 n1 n2 statistic     p
## 1 Microglia  CTRL mhc2 Rydbirk Smajic  9  6        19 0.368
## 2 Microglia    PD mhc2 Rydbirk Smajic 11  5        40 0.180

16b

We provide a curated list with MHC and cytokine genes. Please note, the order of clusters may change.

dat <- read.table("MHC_cytokines_curated.tsv", header = T, sep = "\t")

cm.merged <- con$getJointCountMatrix(raw = T) %>% 
  Matrix::t() %>% 
  .[, colnames(.) %in% names(anno.major)] %>% 
  .[rownames(.) %in% (dat$name[dat$class == "cytokine"] %>% gsub("-","",.)), ] %>% 
  .[rowSums(.) > 0,]

# Create sample-wise annotation
anno.donor <- con$getDatasetPerCell()[colnames(cm.merged)]
anno.subtype <- anno.major %>% 
  .[!is.na(.) & (. %in% c("Microglia", "PVMs"))] %>% 
  factor()

idx <- intersect(anno.donor %>% names(), anno.subtype %>% names())

anno.donor %<>% .[idx]
anno.subtype %<>% .[idx] %>% 
  {`names<-`(as.character(.), names(.))}

anno.final <- paste0(anno.donor,"!!",anno.subtype) %>% 
  `names<-`(anno.donor %>% names())

# Create pseudo CM
cm.pseudo <- sccore::collapseCellsByType(cm.merged %>% Matrix::t(), 
                                         groups = anno.final, min.cell.count = 1) %>% 
  t() %>%
  apply(2, as, "integer")

cm.pseudo %<>% 
  {. + 1} %>% 
  DESeq2::DESeqDataSetFromMatrix(., 
                                 colnames(.) %>% 
                                   strsplit("_") %>% 
                                   sget(1) %>% 
                                   data.frame() %>% 
                                   `dimnames<-`(list(colnames(cm.pseudo), "group")), 
                                 design = ~ group) %>% 
  DESeq2::estimateSizeFactors() %>% 
  DESeq2::counts(normalized = T)
## converting counts to integer mode
## Warning in DESeqDataSet(se, design = design, ignoreRank): some variables in
## design formula are characters, converting to factors
cm.pseudo %<>% 
  Matrix::t() %>% 
  scale() %>%
  Matrix::t()

ord <- cm.pseudo %>% 
  colnames() %>% 
  {data.frame(id = .)} %>% 
  mutate(., 
         ct = strsplit(id, "!!") %>% 
           sget(2),
         condition = strsplit(id, "!!|_") %>% 
           sget(1)) %>% 
  arrange(ct, condition) %>% 
  pull(id)

cm.pseudo %<>% 
  .[, match(ord, colnames(.))]

cm.pseudo %<>% 
  as.data.frame() %>% 
  tibble::rownames_to_column(var = "gene") %>% 
  reshape2::melt(id.vars = c("gene")) %>% 
  mutate(variable = as.character(variable),
         condition = strsplit(variable, "!!|_") %>% 
           sget(1),
         ct = strsplit(variable, "!!") %>% 
           sget(2)) %>% 
  group_by(condition, ct, gene) %>% 
  summarize(m = mean(value)) %>% 
  ungroup() %>% 
  mutate(variable = paste(condition, ct, sep = "!!")) %>% 
  select(-condition, -ct) %>% 
  reshape2::dcast(gene ~ variable, value.var = "m") %>% 
  tibble::column_to_rownames(var = "gene")
## `summarise()` has grouped output by 'condition', 'ct'. You can override using
## the `.groups` argument.
tmp <- cm.pseudo %>% 
  colnames() %>% 
  strsplit("_|!!")

clusters <- tmp %>%
  sget(2)

condition <- tmp %>% 
  sget(1)

ha <- ComplexHeatmap::HeatmapAnnotation(Annotation = clusters,
                                        Condition = condition,
                                        col = list(Annotation = c("Microglia" = unname(pal.major["Microglia"]), 
                                                                  "PVMs" = unname(pal.major["PVMs"])),
                                                   Condition = c("CTRL" = unname(pal.major["CTRL"]), 
                                                                 "MSA" = unname(pal.major["MSA"]),
                                                                 "PD" = unname(pal.major["PD"]))
                                        ))

ComplexHeatmap::Heatmap(cm.pseudo,
                        name = "Expression", 
                        show_column_names = F, 
                        show_row_names = T, 
                        cluster_columns = F, 
                        show_column_dend = F, 
                        cluster_rows = T, 
                        top_annotation = ha, 
                        show_row_dend = F, 
                        column_split = colnames(cm.pseudo) %>% strsplit("!!|_") %>% sget(2) %>% unname() %>% factor(),
                        col=circlize::colorRamp2(c(-1, 1.2), c("white", "firebrick")),
                        row_km = 4)
## Warning: The input is a data frame-like object, convert it to a matrix.

Extended Data Figure 17

17c

c("FOSL2", "TCF4", "STAT1", "NR3C1", "ETV7", "THRB", "BPTF", "RELB", "CEBPD", "HIF1A", "ELF1", "CREM", "ELF2", "ZNF44", "CHD1", "POU2F1", "GTF2IRD1", "NFIA", "NFE2L2", "FOXP1", "FOXO3") %>% 
  clusterProfiler::enrichGO("org.Hs.eg.db", "SYMBOL", "BP") %>% 
  enrichplot::pairwise_termsim() %>% 
  enrichplot::treeplot() +
  scale_fill_manual(values = brewer.pal(6, "Greys")[-1])
## 
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## ! # Invaild edge matrix for <phylo>. A <tbl_df> is returned.
## Scale for fill is already present.
## Adding another scale for fill, which will replace the existing scale.

Extended Data Figure 18

18a

We provide the results from scDRS based on Nalls et al. GWAS summary stats. For calculation of these, see scDRS_PD.ipynb.

dat.scores <- qread("gwas/nalls/PD.full_score.qs")

# Load downstream analyses for cell types versus trait
dat.ct <- read.delim("gwas/nalls/downstream_celltype.tsv", header = F, sep = "\t") %$%  
  strsplit(V1, " ") %>% 
  unlist() %>% 
  gsub("\\", "", ., fixed = T) %>% 
  gsub("\n", "", ., fixed = T) %>% 
  .[!sapply(., \(x) x == "")] %>% 
  .[c(1:5,69:72, # Header
      8:12,75:78, # Astro
      15:19,81:84, # EN
      21:25,86:89, # Immune
      28:32,92:95, # IN
      34:38,97:100, # MSN
      40:44,102:105, # MIC
      46:50,107:110, # OPCs
      52:56,112:115, # OL
      58:62,117:120, # PVMs
      64:68,122:125 # Peri
  )]

dat.sign <- dat.ct %>%
  split(seq(9)) %>% 
  bind_rows() %>% 
  {`colnames<-`(.[-1, ], .[1, ])} %>% 
  as.data.frame()

dat.sign %<>%  mutate(group = c("Astrocytes",
                                "Exc. neurons",
                                "Immune",
                                "Inh. neurons",
                                "MSN",
                                "Microglia",
                                "OPCs",
                                "Oligodendrocytes",
                                "PVMs",
                                "Pericytes/endothelial"))

dat.sign %<>% 
  filter(!group == "Unknown") %>% 
  mutate(p.adj = p.adjust(assoc_mcp, method = "fdr"),
         hetero.adj = p.adjust(hetero_mcp, method = "fdr")) %>% 
  mutate(assoc_sign = sapply(assoc_mcp, \(x) if (x <= 0.001) "***" else if (x <= 0.01 && x > 0.001) "**" else if (x <= 0.05 && x > 0.01) "*" else ""),
         hetero_sign = sapply(hetero_mcp, \(x) if (x <= 0.001) "###" else if (x <= 0.01 && x > 0.001) "##" else if (x <= 0.05 && x > 0.01) "#" else " ")) %>% 
  mutate(sign = paste0(assoc_sign,",",hetero_sign) %>% gsub(", ", "", .)) %>% 
  select(group, sign) %>% 
  dplyr::rename(type = group)

data.frame(cell = dat.scores$X,
           score = dat.scores$norm_score,
           type = anno.major[match(dat.scores$X,
                                   names(anno.major))]) %>%
  group_by(type) %>%
  summarize(m = median(score)) %>%
  filter(!is.na(type)) %>% 
  mutate(type = as.character(type)) %$% 
  arrange(., desc(m)) %>%
  mutate(type = factor(type, levels = type), sign = dat.sign$sign[match(type %>% levels, dat.sign$type)]) %>%
  ggplot(aes(type, m, fill = type)) +
  geom_bar(stat = "identity") +
  geom_text(aes(label = sign), vjust = -0.5) +
  theme_bw() + 
  labs(y = "Mean score", x = "", title = "scDRS mean scores and associations") +
  guides(fill = "none") +
  theme(axis.text.x = element_text(angle = 45, hjust = 1, vjust = 1),
        line = element_blank()) +
  ylim(c(-0.6, 2)) +
  scale_fill_manual(values = pal.major) +
  geom_hline(yintercept = 0)

18b

dat.ct <- read.delim("gwas/nalls/downstream_celltype_microglia.tsv", header = F, sep = "\t") %$%  
  strsplit(V1, " ") %>% 
  unlist() %>% 
  gsub("\\", "", ., fixed = T) %>% 
  gsub("\n", "", ., fixed = T) %>% 
  gsub("group", "", .) %>% 
  .[!sapply(., \(x) x == "")] %>% 
  {c("group", .[seq(35)])} %>% 
  matrix(ncol = 6, byrow = T) %>% 
  as.data.frame() %>% 
  `colnames<-`(., .[1, ]) %>% 
  .[-1, ]

dat.sign <- 
  dat.ct %>% 
  filter(!group == "Unknown") %>% 
  mutate(p.adj = p.adjust(assoc_mcp, method = "fdr"),
         hetero.adj = p.adjust(hetero_mcp, method = "fdr")) %>% 
  mutate(assoc_sign = sapply(assoc_mcp, \(x) if (x <= 0.001) "***" else if (x <= 0.01 && x > 0.001) "**" else if (x <= 0.05 && x > 0.01) "*" else ""),
         hetero_sign = sapply(hetero_mcp, \(x) if (x <= 0.001) "###" else if (x <= 0.01 && x > 0.001) "##" else if (x <= 0.05 && x > 0.01) "#" else " ")) %>% 
  mutate(sign = paste0(assoc_sign,",",hetero_sign) %>% gsub(", ", "", .)) %>% 
  select(group, sign) %>% 
  dplyr::rename(type = group)

p <- data.frame(cell = dat.scores$X %>% gsub("one_", "!!", .),
           score = dat.scores$norm_score,
           type = anno.major[match(dat.scores$X,
                                   names(anno.major))]) %>%
  filter(cell %in% names(anno.micro)) %>% 
  mutate(type = factor(anno.micro[.$cell])) %>% 
  group_by(type) %>%
  summarize(m = mean(score)) %>%
  filter(!is.na(type)) %>% 
  mutate(type = as.character(type)) %>% 
  arrange(desc(m)) %>% 
  mutate(type = factor(type, levels = type), sign = dat.sign$sign[match(type %>% levels, dat.sign$type)]) %>%
  mutate(type = factor(type, levels = type)) %>% 
  ggplot(aes(type, m, fill = type)) + 
  geom_bar(stat = "identity") +
  geom_text(aes(label = sign), vjust = -0.5, nudge_y = -0.05) +
  theme_bw() + 
  theme(line = element_blank()) +
  labs(y = "Mean score", x = "", title = "scDRS for microglia") +
  guides(fill = "none") +
  theme(axis.text.x = element_text(angle = 90, hjust = 1, vjust = 0.5)) +
  ylim(c(0, 2)) +
  scale_fill_manual(values = pal.major)

p

18c

We provide the results from scDRS based on Chia et al. GWAS summary stats. For calculation of these, see scDRS_MSA.ipynb.

dat.scores <- qread("gwas/chia/MSA.full_score.qs")

# Load downstream analyses for cell types versus trait
dat.ct <- read.delim("gwas/chia/downstream_celltype.tsv", header = F, sep = "\t") %$%  
  strsplit(V1, " ") %>% 
  unlist() %>% 
  gsub("\\", "", ., fixed = T) %>% 
  gsub("\n", "", ., fixed = T) %>% 
  .[!sapply(., \(x) x == "")] %>% 
  .[c(1:5,69:72, # Header
      8:12,75:78, # Astro
      15:19,81:84, # EN
      21:25,86:89, # Immune
      28:32,92:95, # IN
      34:38,97:100, # MSN
      40:44,102:105, # MIC
      46:50,107:110, # OPCs
      52:56,112:115, # OL
      58:62,117:120, # PVMs
      64:68,122:125 # Peri
  )]

dat.sign <- dat.ct %>%
  split(seq(9)) %>% 
  bind_rows() %>% 
  {`colnames<-`(.[-1, ], .[1, ])} %>% 
  as.data.frame()

dat.sign %<>%  mutate(group = c("Astrocytes",
                                "Exc. neurons",
                                "Immune",
                                "Inh. neurons",
                                "MSN",
                                "Microglia",
                                "OPCs",
                                "Oligodendrocytes",
                                "PVMs",
                                "Pericytes/endothelial"))

dat.sign %<>% 
  filter(!group == "Unknown") %>% 
  mutate(assoc_sign = sapply(assoc_mcp, \(x) if (x <= 0.001) "***" else if (x <= 0.01 && x > 0.001) "**" else if (x <= 0.05 && x > 0.01) "*" else ""),
         hetero_sign = sapply(hetero_mcp, \(x) if (x <= 0.001) "###" else if (x <= 0.01 && x > 0.001) "##" else if (x <= 0.05 && x > 0.01) "#" else " ")) %>% 
  mutate(sign = paste0(assoc_sign,",",hetero_sign) %>% gsub(",", "", .)) %>% 
  select(group, sign) %>% 
  dplyr::rename(type = group)

data.frame(cell = dat.scores$X,
           score = dat.scores$norm_score,
           type = anno.major[match(dat.scores$X,
                                   names(anno.major))]) %>%
  group_by(type) %>%
  summarize(m = median(score)) %>%
  filter(!is.na(type)) %>% 
  mutate(type = as.character(type)) %$% 
  arrange(., desc(m)) %>%
  mutate(type = factor(type, levels = type), sign = dat.sign$sign[match(type %>% levels, dat.sign$type)]) %>%
  ggplot(aes(type, m, fill = type)) +
  geom_bar(stat = "identity") +
  geom_text(aes(label = sign), vjust = 1.5) +
  theme_bw() + 
  labs(y = "Mean score", x = "", title = "scDRS mean scores and associations") +
  guides(fill = "none") +
  theme(axis.text.x = element_text(angle = 90, hjust = 1, vjust = 0.5),
        line = element_blank()) +
  ylim(c(-0.5, 0.5)) +
  scale_fill_manual(values = pal.major) +
  geom_hline(yintercept = 0)

Extended Data Figure 19

All subfigures were made in GraphPad Prism. Data are available in Phagocytosis_assay_triplicates_raw_data_HH.tsv (Copenhagen experiment) or HOUSTON.tsv (Houston experiment).

Extended Data Table 2

Not rerun

anno.major <- qread("anno_major.qs")

anno.micro <- qread("anno_micro_pvm.qs") %>% 
  renameAnnotation("Steady-state","MIC_steady-state") %>% 
  renameAnnotation("Intermediate1","MIC_intermediate1") %>% 
  renameAnnotation("Intermediate2","MIC_intermediate2") %>% 
  renameAnnotation("Activated","MIC_activated")

anno.glia <- c("anno_astro.qs",
               "anno_oligo.qs",
               "anno_opc.qs") %>% 
  lapply(qread) %>% 
  Reduce(c, .) %>% 
  factor() %>% 
  renameAnnotation("Homeostatic_astrocytes", "AS_homeostatic") %>% 
  renameAnnotation("Reactive_astrocytes", "AS_reactive") %>% 
  renameAnnotation("Homeostatic_LINC01608", "OL_LINC01608") %>%
  renameAnnotation("Homeostatic_SLC5A11", "OL_SLC5A11") %>% 
  renameAnnotation("Reactive_SCGZ", "OL_SGCZ")

anno.neurons <- qread("anno_neurons.qs")

# Minor final
anno.minor <- anno.major[!anno.major %in% c("Inh. neurons", "Exc. neurons", "PVMs", "Astrocytes", "Oligodendrocytes", "OPCs", "Microglia")] %>% 
  {factor(c(., anno.neurons, anno.glia, anno.micro))} %>% 
  factor(levels = sort(levels(.)))

# Major final
anno.major %<>% .[!names(.) %in% names(anno.neurons)] %>% 
  {factor(c(., anno.neurons))} %>% 
  collapseAnnotation("MSN") %>% 
  collapseAnnotation("GABAergic") %>% 
  collapseAnnotation("GLUergic") %>% 
  renameAnnotation("MSN", "Medium spiny neurons") %>% 
  renameAnnotation("GABAergic", "GABAergic neurons") %>% 
  renameAnnotation("GLUergic", "GLUergic neurons") %>% 
  factor(levels = sort(levels(.)))
con.major$n.cores <- 32 # Major object
markers.major <- con.major$getDifferentialGenes(groups = anno.major, 
                                                z.threshold = 1, 
                                                upregulated.only = T, 
                                                append.specificity.metrics = T, 
                                                append.auc = T)

markers.major %>% 
  lapply(filter, PAdj <= 0.05) %>% 
  bind_rows(.id = "Celltype") %>% 
  write.table("Table SX - Celltype markers, major annotation.tsv", sep = "\t", dec = ".", row.names = F)

Extended Data Table 4

Not rerun

res <- list(
  Major = list(CTRLvsMSA = "cao_major_msa.qs",
               CTRLvsPD = "cao_major_pd.qs",
               PDvsMSA = "cao_major_dis.qs"),
  Neurons = list(CTRLvsMSA = "cao_neurons_msa.qs",
                 CTRLvsPD = "cao_neurons_pd.qs",
                 PDvsMSA = "cao_neurons_dis.qs"),
  Glial = list(CTRLvsMSA = "cao_oligo_astro_opc_msa.qs",
               CTRLvsPD = "cao_oligo_astro_opc_pd.qs",
               PDvsMSA = "cao_oligo_astro_opc_dis.qs"),
  Micro_PVM = list(CTRLvsMSA = "cao_micro_pvm_msa.qs",
                   CTRLvsPD = "cao_micro_pvm_pd.qs",
                   PDvsMSA = "cao_micro_pvm_dis.qs")) %>% 
  lapply(\(celltype) sccore::plapply(celltype, \(comp) qread(comp, nthreads = 10)$test.results$coda, n.cores = 3)) %>% 
  lapply(lapply, \(x) data.frame(subtype = names(x$padj),
                                 loadings.min = apply(x$loadings, 1, min),
                                 loadings.median = apply(x$loadings, 1, median),
                                 loadings.max = apply(x$loadings, 1, max),
                                 padj = unname(x$padj))) %>% 
  lapply(bind_rows, .id = "Comparison") %>% 
  bind_rows(.id = "Cell type") %>% 
  `rownames<-`(NULL)

res %>% 
  write.table("Table SX - Loadings.tsv", sep = "\t", dec = ".", row.names = F)

Extended Data Table 5

Not rerun

cao <- Cacoa$new(data.object = con, # Major object
                 cell.groups = anno.major, 
                 ref.level = "CTRL", 
                 target.level = "DISEASE", 
                 sample.groups = con$samples %>% 
                   names() %>% 
                   strsplit("_") %>% 
                   sget(1) %>%
                   {ifelse(. == "CTRL", "CTRL", "DISEASE")} %>% 
                   `names<-`(con$samples %>% 
                               names()),
                 sample.groups.palette = c(c("yellow", RColorBrewer::brewer.pal(n = 6, "Accent")[5:6]) %>% setNames(c("CTRL","PD","MSA"))))

cao$sample.groups <- con$samples %>% 
  names() %>% 
  strsplit("_") %>% 
  sget(1) %>% 
  `names<-`(con$samples %>% 
              names())

res <- cao$plotCellGroupSizes(show.significance = TRUE, 
                              filter.empty.cell.types = FALSE)$data %>% 
  group_by(group, variable) %>% 
  summarize(Min_proportion = min(value),
            Mean_proportion = mean(value),
            Max_proportion = max(value)) %>% 
  dplyr::rename(Condition = group,
                Celltype = variable)

res %>% 
  write.table("Table SX - Proportions, major annotation.tsv", sep = "\t", dec = ".", row.names = F)

Extended Data Table 6

Not rerun

anno.major <- qread("anno_major.qs")

anno.micro <- qread("anno_micro_pvm.qs") %>% 
  renameAnnotation("Steady-state","MIC_steady-state") %>% 
  renameAnnotation("Intermediate1","MIC_intermediate1") %>% 
  renameAnnotation("Intermediate2","MIC_intermediate2") %>% 
  renameAnnotation("Activated","MIC_activated")

anno.glia <- c("anno_astro.qs",
               "anno_oligo.qs",
               "anno_opc.qs") %>% 
  lapply(qread) %>% 
  Reduce(c, .) %>% 
  factor() %>% 
  renameAnnotation("Homeostatic_astrocytes", "AS_homeostatic") %>% 
  renameAnnotation("Reactive_astrocytes", "AS_reactive") %>% 
  renameAnnotation("Homeostatic_LINC01608", "OL_LINC01608") %>%
  renameAnnotation("Homeostatic_SLC5A11", "OL_SLC5A11") %>% 
  renameAnnotation("Reactive_SCGZ", "OL_SGCZ")

anno.neurons <- qread("anno_neurons.qs")

# Minor final
anno.minor <- anno.major[!anno.major %in% c("Inh. neurons", "Exc. neurons", "PVMs", "Astrocytes", "Oligodendrocytes", "OPCs", "Microglia")] %>% 
  {factor(c(., anno.neurons, anno.glia, anno.micro))} %>% 
  factor(levels = sort(levels(.)))

# Major final
anno.major %<>% .[!names(.) %in% names(anno.neurons)] %>% 
  {factor(c(., anno.neurons))} %>% 
  collapseAnnotation("MSN") %>% 
  collapseAnnotation("GABAergic") %>% 
  collapseAnnotation("GLUergic") %>% 
  renameAnnotation("MSN", "Medium spiny neurons") %>% 
  renameAnnotation("GABAergic", "GABAergic neurons") %>% 
  renameAnnotation("GLUergic", "GLUergic neurons") %>% 
  factor(levels = sort(levels(.)))
markers.minor <- con.major$getDifferentialGenes(groups = anno.minor, 
                                                z.threshold = 1, 
                                                upregulated.only = T, 
                                                append.specificity.metrics = T, 
                                                append.auc = T)

markers.minor %>% 
  lapply(filter, PAdj <= 0.05) %>% 
  bind_rows(.id = "Celltype") %>% 
  write.table("Table SX - Celltype markers, minor annotation.tsv", sep = "\t", dec = ".", row.names = F)

Extended Data Table 7

Not rerun

cao <- Cacoa$new(data.object = con, 
                 cell.groups = anno.minor, 
                 ref.level = "CTRL", 
                 target.level = "DISEASE", 
                 sample.groups = con$samples %>% 
                   names() %>% 
                   strsplit("_") %>% 
                   sget(1) %>%
                   {ifelse(. == "CTRL", "CTRL", "DISEASE")} %>% 
                   `names<-`(con$samples %>% 
                               names()),
                 sample.groups.palette = c(c("yellow", RColorBrewer::brewer.pal(n = 6, "Accent")[5:6]) %>% setNames(c("CTRL","PD","MSA"))))

cao$sample.groups <- con$samples %>% 
  names() %>% 
  strsplit("_") %>% 
  sget(1) %>% 
  `names<-`(con$samples %>% 
              names())

res <- cao$plotCellGroupSizes(show.significance = TRUE, 
                              filter.empty.cell.types = FALSE)$data %>% 
  group_by(group, variable) %>% 
  summarize(Min_proportion = min(value),
            Mean_proportion = mean(value),
            Max_proportion = max(value)) %>% 
  dplyr::rename(Condition = group,
                Celltype = variable)

res %>% 
  write.table("Table SX - Proportions, minor annotation.tsv", sep = "\t", dec = ".", row.names = F)

Extended Data Table 8

Not rerun

First, correct OPCs/COPs for age covariate according to scITD calculations

cao_msa <- qread("cao_opc_msa.qs", nthreads = 10)
cao_pd <- qread("cao_opc_pd.qs", nthreads = 10)
cao_dis <- qread("cao_opc_dis.qs", nthreads = 10)

metadata.all <- read.delim("metadata.tsv")

meta.df.msa <- metadata.all %>% 
  filter(sample %in% names(cao_msa$data.object$samples)) %>% 
  tibble::column_to_rownames("sample") %>% 
  select(age)

meta.df.pd <- metadata.all %>% 
  filter(sample %in% names(cao_pd$data.object$samples)) %>% 
  tibble::column_to_rownames("sample") %>% 
  select(age)

meta.df.dis <- metadata.all %>% 
  filter(sample %in% names(cao_dis$data.object$samples)) %>% 
  tibble::column_to_rownames("sample") %>% 
  select(age)

all.genes_msa <- cao_msa$data.object$samples %>% 
  lget("misc") %>% 
  lget("rawCounts") %>% 
  lapply(colnames) %>% 
  Reduce(union, .)

genes.to.omit_msa <- all.genes_msa %>% 
  .[grepl("MT-|RPL|RPS", .)]

all.genes_pd <- cao_pd$data.object$samples %>% 
  lget("misc") %>% 
  lget("rawCounts") %>% 
  lapply(colnames) %>% 
  Reduce(union, .)

genes.to.omit_pd <- all.genes_pd %>% 
  .[grepl("MT-|RPL|RPS", .)]

all.genes_dis <- cao_dis$data.object$samples %>% 
  lget("misc") %>% 
  lget("rawCounts") %>% 
  lapply(colnames) %>% 
  Reduce(union, .)

genes.to.omit_dis <- all.genes_dis %>% 
  .[grepl("MT-|RPL|RPS", .)]

cao_msa$estimateDEPerCellType(covariates = meta.df.msa, name = "de_age", genes.to.omit = genes.to.omit_msa)
cao_pd$estimateDEPerCellType(covariates = meta.df.pd, name = "de_age", genes.to.omit = genes.to.omit_pd)
cao_dis$estimateDEPerCellType(covariates = meta.df.dis, name = "de_age", genes.to.omit = genes.to.omit_dis)

cao_msa$estimateOntology("GSEA", name = "gsea_age", de.name = "de_age", org.db = org.Hs.eg.db::org.Hs.eg.db)
cao_pd$estimateOntology("GSEA", name = "gsea_age", de.name = "de_age", org.db = org.Hs.eg.db::org.Hs.eg.db)
cao_dis$estimateOntology("GSEA", name = "gsea_age", de.name = "de_age", org.db = org.Hs.eg.db::org.Hs.eg.db)

qsave(cao_msa, "cao_opc_msa.qs", nthreads = 10)
qsave(cao_pd, "cao_opc_pd.qs", nthreads = 10)
qsave(cao_dis, "cao_opc_dis.qs", nthreads = 10)
go.list <- dir("cacoa/", full.names = T)[-c(4:7, 15:18, 22, 26)] %>% 
  lapply(qread, nthreads = 10) %>% 
  lapply(purrr::pluck, "test.results") %>% 
  lapply(\(x) if ("gsea_age" %in% names(x)) x$gsea_age else x$gsea) %>% # To include age-corrected calculations for OPCs
  setNames(dir("cacoa/")[-c(4:7, 15:18, 22, 26)])

go.list %>% 
  lget("res") %>% 
  lapply(lapply, lapply, slot, "result") %>% 
  lapply(lapply, data.table::rbindlist, idcol = "Category") %>% 
  lapply(data.table::rbindlist, idcol = "Celltype") %>% 
  data.table::rbindlist(idcol = "Comparison") %>% 
  mutate(Comparison = sapply(Comparison, \(comp) {
    if (grepl("dis", comp)) "PD vs MSA" else if (grepl("msa", comp)) "CTRL vs MSA" else "CTRL vs PD"
  })) %>% 
  filter(p.adjust <= 0.05) %>% 
  write.table("GSEA.csv", sep = ",", dec = ".", row.names = F)

Extended Data Table 9

Not rerun

de.list <- dir("cacoa/", full.names = T)[-c(4:7, 15:18, 22, 26)] %>% 
  lapply(qread, nthreads = 10) %>% 
  lapply(purrr::pluck, "test.results") %>% 
  lapply(\(x) if ("de_age" %in% names(x)) x$de_age else x$de) %>% # To include results from age-corrected calculations for OPCs
  setNames(dir("cacoa/")[-c(4:7, 15:18, 22, 26)])

de.list %>% 
  lapply(lget, "res") %>% 
  lapply(data.table::rbindlist, idcol = "Celltype") %>% 
  lapply(select, Celltype, baseMean, log2FoldChange, lfcSE, stat, pvalue, padj, Gene, Z, Za, CellFrac, SampleFrac) %>% 
  data.table::rbindlist(idcol = "Comparison") %>% 
  mutate(Comparison = sapply(Comparison, \(comp) {
    if (grepl("dis", comp)) "PD vs MSA" else if (grepl("msa", comp)) "CTRL vs MSA" else "CTRL vs PD"
  })) %>% 
  filter(padj <= 0.05) %>% 
  write.table("DEGs.csv", sep = ",", dec = ".", row.names = F)

Extended Data Table 10

Not rerun. We need the res.filter object from calculation of smoothed heatmap for microglia activation trajectory.

go <- clusterProfiler::enrichGO(names(res.filter), 
                                OrgDb = "org.Hs.eg.db", 
                                keyType = "SYMBOL", 
                                universe = colnames(con$getJointCountMatrix()))

go@result %>% 
  filter(pvalue <= 0.05) %>% 
  write.table("Microglia_pseudotime_activation-lineage_GO.tsv", sep = "\t", row.names = F)

Extended Data Table 11

Not rerun. We need the Smooth object for the microglia to PVM trajectory.

go.list <- list(repressed = rownames(Smooth)[1:which(rownames(Smooth) == "EPB41L2")],
                induced = rownames(Smooth)[(which(rownames(Smooth) == "EPB41L2")+1):nrow(Smooth)]) %>% 
  lapply(clusterProfiler::enrichGO,
         OrgDb = "org.Hs.eg.db", 
         keyType = "SYMBOL", 
         universe = colnames(con$getJointCountMatrix()))

go.list %>% 
  lapply(\(x) x@result) %>% 
  lapply(filter, pvalue <= 0.05) %>% 
  data.table::rbindlist(idcol = "geneset") %>% 
  write.table("Microglia_pseudotime_PVM-lineage_GO.tsv", sep = "\t", row.names = F)

Extended Data Table 14

Not rerun. We use the publicly available data from Rydbirk et al..

# Create universes
dat.all <- read.delim("CSF-Sv_Soluble-frac__Report.tsv")

universe.gene <- dat.all %>% 
  pull(PG.Genes) %>% 
  unique()

universe.prot <- dat.all %>% 
  pull(PG.UniProtIds)

# DEPs
dat <- read.delim("Soluble_fraction.txt")

# KEGG, MSA

kegg.res <- dat %>% 
  filter(Disease.group == "MSA") %>% 
  pull(UniProt.ID) %>% 
  clusterProfiler::enrichKEGG(keyType = "uniprot", pvalueCutoff = 0.2, universe = universe.prot, organism = "hsa")

# KEGG, PD

kegg.res.pd <- dat %>% 
  filter(Disease.group == "PD") %>% 
  pull(UniProt.ID) %>% 
  clusterProfiler::enrichKEGG(keyType = "uniprot", pvalueCutoff = 0.2, universe = universe.prot, organism = "hsa")

# Write table

list(MSA = kegg.res, PD = kegg.res.pd) %>% 
  lapply(getElement, "result") %>% 
  lapply(dplyr::select, Description, GeneRatio, BgRatio, pvalue, p.adjust, ID, geneID) %>% 
  bind_rows(.id = "Disease") %>% 
  write.table("TableS14_KEGG.tsv", sep = "\t", row.names = F)

Session info

Time to knit

Sys.time() - tt
## Time difference of 31.49379 mins
sessionInfo()
## R version 4.5.2 (2025-10-31)
## Platform: x86_64-pc-linux-gnu
## Running under: Ubuntu 24.04.3 LTS
## 
## Matrix products: default
## BLAS/LAPACK: /opt/intel/oneapi/mkl/2025.3/lib/libmkl_gf_lp64.so.2;  LAPACK version 3.12.1
## 
## Random number generation:
##  RNG:     L'Ecuyer-CMRG 
##  Normal:  Inversion 
##  Sample:  Rejection 
##  
## locale:
##  [1] LC_CTYPE=en_US.UTF-8       LC_NUMERIC=C              
##  [3] LC_TIME=en_US.UTF-8        LC_COLLATE=en_US.UTF-8    
##  [5] LC_MONETARY=en_US.UTF-8    LC_MESSAGES=C             
##  [7] LC_PAPER=en_US.UTF-8       LC_NAME=C                 
##  [9] LC_ADDRESS=C               LC_TELEPHONE=C            
## [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C       
## 
## time zone: Europe/Berlin
## tzcode source: system (glibc)
## 
## attached base packages:
## [1] stats4    grid      stats     graphics  grDevices utils     datasets 
## [8] methods   base     
## 
## other attached packages:
##  [1] ggraph_2.2.1                scITD_1.0.4                
##  [3] gamm4_0.2-7                 mgcv_1.9-3                 
##  [5] nlme_3.1-168                lme4_1.1-37                
##  [7] slingshot_2.16.0            TrajectoryUtils_1.16.1     
##  [9] SingleCellExperiment_1.30.1 SummarizedExperiment_1.38.1
## [11] Biobase_2.68.0              GenomicRanges_1.60.0       
## [13] GenomeInfoDb_1.44.0         IRanges_2.42.0             
## [15] S4Vectors_0.46.0            BiocGenerics_0.54.0        
## [17] generics_0.1.4              MatrixGenerics_1.20.0      
## [19] matrixStats_1.5.0           princurve_2.1.6            
## [21] rstatix_0.7.2               stringr_1.5.1              
## [23] ComplexHeatmap_2.24.0       circlize_0.4.16            
## [25] ggrepel_0.9.6               CRMetrics_0.3.2            
## [27] RColorBrewer_1.1-3          ggpmisc_0.6.1              
## [29] ggpp_0.5.8-1                ggforce_0.4.2              
## [31] reshape2_1.4.4              STRINGdb_2.20.0            
## [33] ggpubr_0.6.0                cowplot_1.1.3              
## [35] ggsci_3.2.0                 ggplot2_3.5.2              
## [37] qs_0.27.3                   scHelper_0.0.5             
## [39] sccore_1.0.5                cacoa_0.4.0                
## [41] dplyr_1.1.4                 magrittr_2.0.3             
## [43] conos_1.5.2                 igraph_2.1.4               
## [45] Matrix_1.7-3               
## 
## loaded via a namespace (and not attached):
##   [1] rTensor_1.4.8             R.methodsS3_1.8.2        
##   [3] dichromat_2.0-0.1         Rmisc_1.5.1              
##   [5] Biostrings_2.76.0         vctrs_0.6.5              
##   [7] ggtangle_0.0.6            RApiSerialize_0.1.4      
##   [9] digest_0.6.37             png_0.1-8                
##  [11] shape_1.4.6.1             registry_0.5-1           
##  [13] combinat_0.0-8            magick_2.8.6             
##  [15] MASS_7.3-65               foreach_1.5.2            
##  [17] httpuv_1.6.16             qvalue_2.40.0            
##  [19] withr_3.0.2               ggrastr_1.0.2            
##  [21] psych_2.5.3               xfun_0.52                
##  [23] ggfun_0.1.8               survival_3.8-3           
##  [25] memoise_2.0.1             ggbeeswarm_0.7.2         
##  [27] clusterProfiler_4.16.0    MatrixModels_0.5-4       
##  [29] gson_0.1.0                tidytree_0.4.6           
##  [31] GlobalOptions_0.1.2       gtools_3.9.5             
##  [33] pbapply_1.7-2             R.oo_1.27.1              
##  [35] Formula_1.2-5             KEGGREST_1.48.0          
##  [37] promises_1.3.2            httr_1.4.7               
##  [39] hash_2.2.6.3              stringfish_0.16.0        
##  [41] rstudioapi_0.17.1         UCSC.utils_1.4.0         
##  [43] DOSE_4.2.0                polyclip_1.10-7          
##  [45] ca_0.71.1                 TSCAN_1.46.0             
##  [47] GenomeInfoDbData_1.2.14   quadprog_1.5-8           
##  [49] SparseArray_1.8.0         xtable_1.8-4             
##  [51] doParallel_1.0.17         evaluate_1.0.3           
##  [53] S4Arrays_1.8.0            irlba_2.3.5.1            
##  [55] colorspace_2.1-1          polynom_1.4-1            
##  [57] later_1.4.2               viridis_0.6.5            
##  [59] ggtree_3.16.0             lattice_0.22-7           
##  [61] SparseM_1.84-2            triebeard_0.4.1          
##  [63] pillar_1.10.2             iterators_1.0.14         
##  [65] caTools_1.18.3            compiler_4.5.2           
##  [67] stringi_1.8.7             TSP_1.2-4                
##  [69] minqa_1.2.8               plyr_1.8.9               
##  [71] drat_0.2.5                crayon_1.5.3             
##  [73] abind_1.4-8               gridGraphics_0.5-1       
##  [75] chron_2.3-62              locfit_1.5-9.12          
##  [77] graphlayouts_1.2.2        org.Hs.eg.db_3.21.0      
##  [79] bit_4.6.0                 fastmatch_1.1-6          
##  [81] tradeSeq_1.22.0           codetools_0.2-20         
##  [83] bslib_0.9.0               GetoptLong_1.0.5         
##  [85] mime_0.13                 splines_4.5.2            
##  [87] Rcpp_1.0.14               quantreg_6.1             
##  [89] sparseMatrixStats_1.20.0  pagoda2_1.0.12           
##  [91] brew_1.0-10               N2R_1.0.3                
##  [93] knitr_1.50                blob_1.2.4               
##  [95] utf8_1.2.5                clue_0.3-66              
##  [97] fs_1.6.6                  confintr_1.0.2           
##  [99] DelayedMatrixStats_1.30.0 Rdpack_2.6.4             
## [101] ggsignif_0.6.4            ggplotify_0.1.2          
## [103] tibble_3.2.1              sqldf_0.4-11             
## [105] statmod_1.5.0             tweenr_2.0.3             
## [107] pkgconfig_2.0.3           tools_4.5.2              
## [109] Rook_1.2                  cachem_1.1.0             
## [111] rbibutils_2.3             RSQLite_2.3.11           
## [113] viridisLite_0.4.2         DBI_1.2.3                
## [115] fastmap_1.2.0             rmarkdown_2.29           
## [117] scales_1.4.0              pbmcapply_1.5.1          
## [119] ica_1.0-3                 broom_1.0.8              
## [121] sass_0.4.10               patchwork_1.3.0          
## [123] carData_3.0-5             farver_2.1.2             
## [125] reformulas_0.4.1          tidygraph_1.3.1          
## [127] gsubfn_0.7                yaml_2.3.10              
## [129] cli_3.6.5                 purrr_1.0.4              
## [131] lifecycle_1.0.4           uwot_0.2.3               
## [133] backports_1.5.0           BiocParallel_1.42.0      
## [135] gtable_0.3.6              rjson_0.2.23             
## [137] parallel_4.5.2            ape_5.8-1                
## [139] SnowballC_0.7.1           limma_3.64.0             
## [141] jsonlite_2.0.0            edgeR_4.6.2              
## [143] seriation_1.5.7           bitops_1.0-9             
## [145] bit64_4.6.0-1             Rtsne_0.17               
## [147] yulab.utils_0.2.0         coda.base_1.0.0          
## [149] proto_1.0.0               urltools_1.7.3           
## [151] RcppParallel_5.1.10       jquerylib_0.1.4          
## [153] GOSemSim_2.34.0           R.utils_2.13.0           
## [155] lazyeval_0.2.2            shiny_1.10.0             
## [157] htmltools_0.5.8.1         enrichplot_1.28.2        
## [159] GO.db_3.21.0              glue_1.8.0               
## [161] XVector_0.48.0            treeio_1.32.0            
## [163] mclust_6.1.1              dunn.test_1.3.6          
## [165] mnormt_2.1.1              RMTstat_0.3.1            
## [167] gridExtra_2.3             boot_1.3-32              
## [169] R6_2.6.1                  tidyr_1.3.1              
## [171] DESeq2_1.48.1             gplots_3.2.0             
## [173] labeling_0.4.3            cluster_2.1.8.1          
## [175] FSA_0.10.0                aplot_0.2.5              
## [177] nloptr_2.2.1              DelayedArray_0.34.1      
## [179] tidyselect_1.2.1          vipor_0.4.7              
## [181] plotrix_3.8-4             dendsort_0.3.4           
## [183] car_3.1-3                 AnnotationDbi_1.70.0     
## [185] leidenAlg_1.1.5           fastICA_1.2-7            
## [187] KernSmooth_2.23-26        data.table_1.17.2        
## [189] fgsea_1.34.0              rlang_1.1.6              
## [191] lsa_0.73.3                ggnewscale_0.5.1         
## [193] Cairo_1.6-2               beeswarm_0.4.0